In [42]:
import sys
import operator
import re
import json
import nltk  # needed for
# nltk.download('punkt') #getting libraries
# nltk.download('stopwords')
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer

outfile="output.json"
queriesFile="queries.txt"
IndexFile="input_corpus.txt"

ps = PorterStemmer()
stopwords = set(stopwords.words('english'))


def word_tokenize(line):
    return re.split(r'[\s-]+', line)


def ExtractLine(line):
    data = line.split('\t')
    if len(data) < 1:
        return None
    return data[0], data[1]


def CleanLine(line):
    return re.sub(r'[^a-z\s0-9-]', ' ', line.strip().lower())


def StemWords(words):
    stemmed = []
    for word in words:
        stem = ps.stem(word)
        if stem in stemmed:
            continue
        stemmed.append(stem)
    return stemmed


def RemoveStopWords(words):
    return [x for x in words if x not in stopwords]


def ReadInput(filename):
    file1 = open(filename, 'r+', encoding="utf8")
    Lines = file1.readlines()
    file1.close()
    return Lines


def main():
    linked_list = ProcessLibrary()
    #  print(linked_list.traverse_list())
    # print(GetIndexesOfstemmedWord("etiopathogenesi", linked_list))
    SearchWords = ProcessSearch()
    Searches = ConvertSearchesIntoIndex(SearchWords, linked_list)
    with open(outfile, 'w') as outfilePointer:
        json.dump(Searches, outfilePointer)
    #print(json.dumps(Searches))


def ProcessSingleSearch(SearchWordList, linked_list):
    MaxDocId = -1
    MinDocId = -1
    Indexes = []
    for word in SearchWordList:
        Stimpack = GetIndexesOfstemmedWord(word, linked_list)
        Indexes.append([word, Stimpack])
        if MinDocId == -1:
            MinDocId = Stimpack[0]
        elif MinDocId > Stimpack[0]:
            MinDocId = Stimpack[0]
        if MaxDocId == -1:
            MaxDocId = Stimpack[-1]
        elif Stimpack[-1] < MaxDocId:
            MaxDocId = Stimpack[-1]
    for IndexList in Indexes:
        IndexList[1] = set(filter(lambda e: e >= MinDocId and e <= MaxDocId, IndexList[1]))
    return Indexes


def ConvertSearchesIntoIndex(SearchWords, linked_list):
    jsonData = dict()
    jsonData["postingsList"] = dict()
    jsonData["daatAnd"] = dict()
    for SearchWordListItems in SearchWords:
        SearchWordList = SearchWordListItems[0]
        Indexes = ProcessSingleSearch(SearchWordList, linked_list)
        for Index in Indexes:
            res = list(Index[1])
            res.sort()
            jsonData["postingsList"][Index[0]] = res

        JustIndexes = map(lambda x: x[1], Indexes)
        Compares = 0
        for i in Indexes:
            Compares += len(i[1])
        InterSects = list(set.intersection(*JustIndexes))
        InterSects.sort()

        SearchPhrase = SearchWordListItems[1]
        jsonData["daatAnd"][SearchPhrase] = dict()
        jsonData["daatAnd"][SearchPhrase]["results"] = InterSects
        jsonData["daatAnd"][SearchPhrase]["num_docs"] = len(InterSects)
        jsonData["daatAnd"][SearchPhrase]["num_comparisons"] = Compares
    return jsonData

def ProcessSearch():
    Lines = ReadInput(queriesFile)
    Data = []
    for line in Lines:
        print("Input query -> ",line)
        WordString = CleanLine(line)
        print("Query Post Processing -> ",WordString)
        cleaned_line = CleanLine(WordString)
        tokens = word_tokenize(cleaned_line)
        print("Doc tokens post whitespace tokenizing -> ",tokens)
        t = RemoveStopWords(tokens)
        print("Doc tokens post stopword removal -> ",t)
        stemmed = StemWords(t)
        print("Doc tokens post stemming -> ",stemmed,"\n")
        Data.append([stemmed, line.strip()])
    return Data


def ProcessLibrary():
    linked_list = LinkedList()
    Lines = ReadInput(IndexFile)
    Data = []
    maps = dict()
    for line in Lines:
        print("Input -> ",line)
        docId, WordString = ExtractLine(line)
        print ("Doc id -> "+docId)
        print("Doc text -> "+WordString+"\n")
        if docId is None:
            continue
        cleaned_line = CleanLine(WordString)
        print("Doc text post processing -> ",cleaned_line,"\n") 
        tokens = word_tokenize(cleaned_line)
        print("Doc tokens post whitespace tokenizing -> ",tokens)
        t = RemoveStopWords(tokens)
        print("Doc tokens post stopword removal -> ",t)
        stemmed = StemWords(t)
        print("Doc tokens post stemming -> ",stemmed,"\n")
        Data.append([int(docId), stemmed])
    sorted_list = sorted(Data, key=operator.itemgetter(0))
    for doc in sorted_list:
        DocID = doc[0]
        Words = doc[1]
        for word in Words:
            if word not in maps.keys():
                maps[word] = []
            if DocID not in maps[word]:
                maps[word].append(DocID)
    for word in maps.keys():
        linked_list.insert_at_end([word, maps[word]])
    return linked_list


def GetIndexesOfstemmedWord(word, linkedList):
    start = linkedList.start_node
    while start is not None:
        if start.value[0] == word:
            return start.value[1]
        start = start.next
    return None

class Node:
    def __init__(self, value = None, next = None):
        self.value = value
        self.next = next

class LinkedList:

    def __init__(self, index=0, mode="simple"):
        self.start_node = None # Head pointer
        self.end_node = None # Tail pointer
        # Additional attributes
        self.index = index
        self.mode = "simple"

    # Method to traverse a created linked list
    def traverse_list(self):
        traversal = []
        if self.start_node is None:
            print("List has no element")
            return
        else:
            n = self.start_node
            # Start traversal from head, and go on till you reach None
            while n is not None:
                traversal.append(n.value)
                n = n.next
            return traversal

    # Method to insert elements in the linked list
    def insert_at_end(self, value):
        # determine data type of the value
        if 'list' in str(type(value)):
            self.mode = "list"

        # Initialze a linked list element of type "Node"
        new_node = Node(value)
        n = self.start_node # Head pointer

        # If linked list is empty, insert element at head
        if self.start_node is None:
            self.start_node = new_node
            self.end_node = new_node
            return "Inserted"

        elif self.mode == "list":
            if self.start_node.value[self.index] >= value[self.index]:
                self.start_node = new_node
                self.start_node.next = n
                return "Inserted"

            elif self.end_node.value[self.index] <= value[self.index]:
                self.end_node.next = new_node
                self.end_node = new_node
                return "Inserted"

            else:
                while value[self.index] > n.value[self.index] and value[self.index] < self.end_node.value[self.index] and n.next is not None:
                    n = n.next

                m = self.start_node
                while m.next != n and m.next is not None:
                    m = m.next
                m.next = new_node
                new_node.next = n
                return "Inserted"
        else:
            # If element to be inserted has lower value than head, insert new element at head
            if self.start_node.value >= value:
                self.start_node = new_node
                self.start_node.next = n
                return "Inserted"

            # If element to be inserted has higher value than tail, insert new element at tail
            elif self.end_node.value <= value:
                self.end_node.next = new_node
                self.end_node = new_node
                return "Inserted"

            # If element to be inserted lies between head & tail, find the appropriate position to insert it
            else:
                while value > n.value and value < self.end_node.value and n.next is not None:
                    n = n.next

                m = self.start_node
                while m.next != n and m.next is not None:
                    m = m.next
                m.next = new_node
                new_node.next = n
                return "Inserted"

if __name__ == '__main__':
    main()

Input ->  144285	Epidemiological and clinical characteristics of 136 cases of COVID-19 in main district of Chongqing

Doc id -> 144285
Doc text -> Epidemiological and clinical characteristics of 136 cases of COVID-19 in main district of Chongqing


Doc text post processing ->  epidemiological and clinical characteristics of 136 cases of covid-19 in main district of chongqing 

Doc tokens post whitespace tokenizing ->  ['epidemiological', 'and', 'clinical', 'characteristics', 'of', '136', 'cases', 'of', 'covid', '19', 'in', 'main', 'district', 'of', 'chongqing']
Doc tokens post stopword removal ->  ['epidemiological', 'clinical', 'characteristics', '136', 'cases', 'covid', '19', 'main', 'district', 'chongqing']
Doc tokens post stemming ->  ['epidemiolog', 'clinic', 'characterist', '136', 'case', 'covid', '19', 'main', 'district', 'chongq'] 

Input ->  74698	Late‐onset Pneumocystis jirovecii pneumonia post–fludarabine, cyclophosphamide and rituximab: implications for prophylaxis

Doc id 

Doc text post processing ->  ethical challenges arising in the covid-19 pandemic  an overview from the association of bioethics program directors  abpd  task force 

Doc tokens post whitespace tokenizing ->  ['ethical', 'challenges', 'arising', 'in', 'the', 'covid', '19', 'pandemic', 'an', 'overview', 'from', 'the', 'association', 'of', 'bioethics', 'program', 'directors', 'abpd', 'task', 'force']
Doc tokens post stopword removal ->  ['ethical', 'challenges', 'arising', 'covid', '19', 'pandemic', 'overview', 'association', 'bioethics', 'program', 'directors', 'abpd', 'task', 'force']
Doc tokens post stemming ->  ['ethic', 'challeng', 'aris', 'covid', '19', 'pandem', 'overview', 'associ', 'bioethic', 'program', 'director', 'abpd', 'task', 'forc'] 

Input ->  48538	Fever Screening and Detection of Febrile Arrivals at an International Airport in Korea: Association among Self-reported Fever, Infrared Thermal Camera Scanning, and Tympanic Temperature

Doc id -> 48538
Doc text -> Fever Scree

Doc tokens post stemming ->  ['analysi', 'hospit', 'acut', 'respiratori', 'diseas', 'recruit', 'immun', 'adenoviru', 'type', '4', '7', 'vaccin', '1', ''] 

Input ->  59615	Small Animal Literature Index

Doc id -> 59615
Doc text -> Small Animal Literature Index


Doc text post processing ->  small animal literature index 

Doc tokens post whitespace tokenizing ->  ['small', 'animal', 'literature', 'index']
Doc tokens post stopword removal ->  ['small', 'animal', 'literature', 'index']
Doc tokens post stemming ->  ['small', 'anim', 'literatur', 'index'] 

Input ->  62090	Emergency tracheal intubation: techniques and outcomes.

Doc id -> 62090
Doc text -> Emergency tracheal intubation: techniques and outcomes.


Doc text post processing ->  emergency tracheal intubation  techniques and outcomes  

Doc tokens post whitespace tokenizing ->  ['emergency', 'tracheal', 'intubation', 'techniques', 'and', 'outcomes', '']
Doc tokens post stopword removal ->  ['emergency', 'tracheal', 'intubation'

Doc text post processing ->  kardiologie 

Doc tokens post whitespace tokenizing ->  ['kardiologie']
Doc tokens post stopword removal ->  ['kardiologie']
Doc tokens post stemming ->  ['kardiologi'] 

Input ->  712	Estimating the risk of 2019 Novel Coronavirus death during the course of the outbreak in China, 2020

Doc id -> 712
Doc text -> Estimating the risk of 2019 Novel Coronavirus death during the course of the outbreak in China, 2020


Doc text post processing ->  estimating the risk of 2019 novel coronavirus death during the course of the outbreak in china  2020 

Doc tokens post whitespace tokenizing ->  ['estimating', 'the', 'risk', 'of', '2019', 'novel', 'coronavirus', 'death', 'during', 'the', 'course', 'of', 'the', 'outbreak', 'in', 'china', '2020']
Doc tokens post stopword removal ->  ['estimating', 'risk', '2019', 'novel', 'coronavirus', 'death', 'course', 'outbreak', 'china', '2020']
Doc tokens post stemming ->  ['estim', 'risk', '2019', 'novel', 'coronaviru', 'death', 'c


Doc text post processing ->  covid-19  review of epidemiology and potential treatments against 2019 novel coronavirus 

Doc tokens post whitespace tokenizing ->  ['covid', '19', 'review', 'of', 'epidemiology', 'and', 'potential', 'treatments', 'against', '2019', 'novel', 'coronavirus']
Doc tokens post stopword removal ->  ['covid', '19', 'review', 'epidemiology', 'potential', 'treatments', '2019', 'novel', 'coronavirus']
Doc tokens post stemming ->  ['covid', '19', 'review', 'epidemiolog', 'potenti', 'treatment', '2019', 'novel', 'coronaviru'] 

Input ->  84515	Impact of Non-cardiac Comorbidities in Adults with Congenital Heart Disease: Management of Multisystem Complications

Doc id -> 84515
Doc text -> Impact of Non-cardiac Comorbidities in Adults with Congenital Heart Disease: Management of Multisystem Complications


Doc text post processing ->  impact of non-cardiac comorbidities in adults with congenital heart disease  management of multisystem complications 

Doc tokens post wh

Input ->  5511	The usefulness of endotracheal tube twisting in facilitating tube delivery to glottis opening during GlideScope intubation in infants: randomized trial.

Doc id -> 5511
Doc text -> The usefulness of endotracheal tube twisting in facilitating tube delivery to glottis opening during GlideScope intubation in infants: randomized trial.


Doc text post processing ->  the usefulness of endotracheal tube twisting in facilitating tube delivery to glottis opening during glidescope intubation in infants  randomized trial  

Doc tokens post whitespace tokenizing ->  ['the', 'usefulness', 'of', 'endotracheal', 'tube', 'twisting', 'in', 'facilitating', 'tube', 'delivery', 'to', 'glottis', 'opening', 'during', 'glidescope', 'intubation', 'in', 'infants', 'randomized', 'trial', '']
Doc tokens post stopword removal ->  ['usefulness', 'endotracheal', 'tube', 'twisting', 'facilitating', 'tube', 'delivery', 'glottis', 'opening', 'glidescope', 'intubation', 'infants', 'randomized', 'trial',

Doc tokens post stemming ->  ['simpl', 'econom', 'solut', 'person', 'protect', 'equip', 'face', 'mask', 'shield', 'health', 'care', 'staff', 'covid', '19'] 

Input ->  129216	"How many infections of COVID-19 there will be in the""Diamond Princess""-Predicted by a virus transmission model based on the simulation of crowd flow"

Doc id -> 129216
Doc text -> "How many infections of COVID-19 there will be in the""Diamond Princess""-Predicted by a virus transmission model based on the simulation of crowd flow"


Doc text post processing ->   how many infections of covid-19 there will be in the  diamond princess  -predicted by a virus transmission model based on the simulation of crowd flow  

Doc tokens post whitespace tokenizing ->  ['', 'how', 'many', 'infections', 'of', 'covid', '19', 'there', 'will', 'be', 'in', 'the', 'diamond', 'princess', 'predicted', 'by', 'a', 'virus', 'transmission', 'model', 'based', 'on', 'the', 'simulation', 'of', 'crowd', 'flow', '']
Doc tokens post stopword r

Input ->  72568	The posterior bundle of the elbow medial collateral ligament: biomechanical study and proposal for a new reconstruction surgical technique.

Doc id -> 72568
Doc text -> The posterior bundle of the elbow medial collateral ligament: biomechanical study and proposal for a new reconstruction surgical technique.


Doc text post processing ->  the posterior bundle of the elbow medial collateral ligament  biomechanical study and proposal for a new reconstruction surgical technique  

Doc tokens post whitespace tokenizing ->  ['the', 'posterior', 'bundle', 'of', 'the', 'elbow', 'medial', 'collateral', 'ligament', 'biomechanical', 'study', 'and', 'proposal', 'for', 'a', 'new', 'reconstruction', 'surgical', 'technique', '']
Doc tokens post stopword removal ->  ['posterior', 'bundle', 'elbow', 'medial', 'collateral', 'ligament', 'biomechanical', 'study', 'proposal', 'new', 'reconstruction', 'surgical', 'technique', '']
Doc tokens post stemming ->  ['posterior', 'bundl', 'elbow', '

Doc tokens post stemming ->  ['parainfluenza', 'viru', 'type', '3', 'pneumonia', 'bone', 'marrow', 'transplant', 'recipi', 'multipl', 'small', 'nodul', 'high', 'resolut', 'lung', 'comput', 'tomographi', 'scan', 'provid', 'radiolog', 'clue', 'diagnosi'] 

Input ->  54096	Allergic respiratory disease care in the COVID-19 era: a EUFOREA statement

Doc id -> 54096
Doc text -> Allergic respiratory disease care in the COVID-19 era: a EUFOREA statement


Doc text post processing ->  allergic respiratory disease care in the covid-19 era  a euforea statement 

Doc tokens post whitespace tokenizing ->  ['allergic', 'respiratory', 'disease', 'care', 'in', 'the', 'covid', '19', 'era', 'a', 'euforea', 'statement']
Doc tokens post stopword removal ->  ['allergic', 'respiratory', 'disease', 'care', 'covid', '19', 'era', 'euforea', 'statement']
Doc tokens post stemming ->  ['allerg', 'respiratori', 'diseas', 'care', 'covid', '19', 'era', 'euforea', 'statement'] 

Input ->  124724	Prognostic Value of P

Doc tokens post stopword removal ->  ['impact', 'nutrition', 'covid', '19', 'susceptibility', 'long', 'term', 'consequences']
Doc tokens post stemming ->  ['impact', 'nutrit', 'covid', '19', 'suscept', 'long', 'term', 'consequ'] 

Input ->  70299	COVID-19 and Acute Pulmonary Embolism in Postpartum Patient

Doc id -> 70299
Doc text -> COVID-19 and Acute Pulmonary Embolism in Postpartum Patient


Doc text post processing ->  covid-19 and acute pulmonary embolism in postpartum patient 

Doc tokens post whitespace tokenizing ->  ['covid', '19', 'and', 'acute', 'pulmonary', 'embolism', 'in', 'postpartum', 'patient']
Doc tokens post stopword removal ->  ['covid', '19', 'acute', 'pulmonary', 'embolism', 'postpartum', 'patient']
Doc tokens post stemming ->  ['covid', '19', 'acut', 'pulmonari', 'embol', 'postpartum', 'patient'] 

Input ->  78160	Synthesis of 5-isoxazol-5-yl-2′-deoxyuridines exhibiting antiviral activity against HSV and several RNA viruses

Doc id -> 78160
Doc text -> Synthesis 

Doc tokens post stopword removal ->  ['ubiquitin', 'proteasome', 'system', 'spongiform', 'degenerative', 'disorders']
Doc tokens post stemming ->  ['ubiquitin', 'proteasom', 'system', 'spongiform', 'degen', 'disord'] 

Input ->  12319	VID-19 pandemic: the 3R’s (reduce, refine, and replace) of personal protective equipment (PPE) sustainability

Doc id -> 12319
Doc text -> VID-19 pandemic: the 3R’s (reduce, refine, and replace) of personal protective equipment (PPE) sustainability


Doc text post processing ->  vid-19 pandemic  the 3r s  reduce  refine  and replace  of personal protective equipment  ppe  sustainability 

Doc tokens post whitespace tokenizing ->  ['vid', '19', 'pandemic', 'the', '3r', 's', 'reduce', 'refine', 'and', 'replace', 'of', 'personal', 'protective', 'equipment', 'ppe', 'sustainability']
Doc tokens post stopword removal ->  ['vid', '19', 'pandemic', '3r', 'reduce', 'refine', 'replace', 'personal', 'protective', 'equipment', 'ppe', 'sustainability']
Doc tokens post

Doc tokens post whitespace tokenizing ->  ['radioisotope', 'cisternography', 'a', 'potentially', 'useful', 'tool', 'for', 'headache', 'diagnosis', 'in', 'patients', 'with', 'an', 'idd', 'pump', '']
Doc tokens post stopword removal ->  ['radioisotope', 'cisternography', 'potentially', 'useful', 'tool', 'headache', 'diagnosis', 'patients', 'idd', 'pump', '']
Doc tokens post stemming ->  ['radioisotop', 'cisternographi', 'potenti', 'use', 'tool', 'headach', 'diagnosi', 'patient', 'idd', 'pump', ''] 

Input ->  12669	Drainage systems, an occluded source of sanitation related outbreaks

Doc id -> 12669
Doc text -> Drainage systems, an occluded source of sanitation related outbreaks


Doc text post processing ->  drainage systems  an occluded source of sanitation related outbreaks 

Doc tokens post whitespace tokenizing ->  ['drainage', 'systems', 'an', 'occluded', 'source', 'of', 'sanitation', 'related', 'outbreaks']
Doc tokens post stopword removal ->  ['drainage', 'systems', 'occluded', '

Doc tokens post whitespace tokenizing ->  ['chapter', '7', 'ifn', 'inducibility', 'of', 'major', 'histocompatibility', 'antigens', 'in', 'tumors']
Doc tokens post stopword removal ->  ['chapter', '7', 'ifn', 'inducibility', 'major', 'histocompatibility', 'antigens', 'tumors']
Doc tokens post stemming ->  ['chapter', '7', 'ifn', 'induc', 'major', 'histocompat', 'antigen', 'tumor'] 

Input ->  1146	Serological evidence of widespread exposure of Grenada fruit bats to chikungunya virus

Doc id -> 1146
Doc text -> Serological evidence of widespread exposure of Grenada fruit bats to chikungunya virus


Doc text post processing ->  serological evidence of widespread exposure of grenada fruit bats to chikungunya virus 

Doc tokens post whitespace tokenizing ->  ['serological', 'evidence', 'of', 'widespread', 'exposure', 'of', 'grenada', 'fruit', 'bats', 'to', 'chikungunya', 'virus']
Doc tokens post stopword removal ->  ['serological', 'evidence', 'widespread', 'exposure', 'grenada', 'fruit', '

Doc tokens post whitespace tokenizing ->  ['periprocedural', 'outcomes', 'and', 'early', 'safety', 'with', 'the', 'use', 'of', 'the', 'pipeline', 'flex', 'embolization', 'device', 'with', 'shield', 'technology', 'for', 'unruptured', 'intracranial', 'aneurysms', 'preliminary', 'results', 'from', 'a', 'prospective', 'clinical', 'study', '']
Doc tokens post stopword removal ->  ['periprocedural', 'outcomes', 'early', 'safety', 'use', 'pipeline', 'flex', 'embolization', 'device', 'shield', 'technology', 'unruptured', 'intracranial', 'aneurysms', 'preliminary', 'results', 'prospective', 'clinical', 'study', '']
Doc tokens post stemming ->  ['periprocedur', 'outcom', 'earli', 'safeti', 'use', 'pipelin', 'flex', 'embol', 'devic', 'shield', 'technolog', 'unruptur', 'intracrani', 'aneurysm', 'preliminari', 'result', 'prospect', 'clinic', 'studi', ''] 

Input ->  148933	Implications for COVID-19: a systematic review of nurses’ experiences of working in acute care hospital settings during a respi

Doc tokens post stemming ->  ['da', 'mdk', 'reformgesetz', 'mit', 'allen', 'nachfolgenden', 'nderungen', 'der', 'fassung', 'de', 'zweiten', 'gesetz', 'zum', 'schutz', 'bev', 'lkerung', 'bei', 'einer', 'epidemischen', 'lage', 'von', 'national', 'tragweit', 'reform', 'act', 'subsequ', 'amend', 'second', 'protect', 'popul', 'event', 'epidem', 'situat', 'nation', 'signific', ''] 

Input ->  23505	Why the ABCs Matter More than Ever in Medical Education

Doc id -> 23505
Doc text -> Why the ABCs Matter More than Ever in Medical Education


Doc text post processing ->  why the abcs matter more than ever in medical education 

Doc tokens post whitespace tokenizing ->  ['why', 'the', 'abcs', 'matter', 'more', 'than', 'ever', 'in', 'medical', 'education']
Doc tokens post stopword removal ->  ['abcs', 'matter', 'ever', 'medical', 'education']
Doc tokens post stemming ->  ['abc', 'matter', 'ever', 'medic', 'educ'] 

Input ->  138204	Anticoagulant and antiarrhythmic effects of heparin in the treatme

Doc text post processing ->  infection of polarized epithelial cells with enteric and respiratory tract bovine coronaviruses and release of virus progeny  

Doc tokens post whitespace tokenizing ->  ['infection', 'of', 'polarized', 'epithelial', 'cells', 'with', 'enteric', 'and', 'respiratory', 'tract', 'bovine', 'coronaviruses', 'and', 'release', 'of', 'virus', 'progeny', '']
Doc tokens post stopword removal ->  ['infection', 'polarized', 'epithelial', 'cells', 'enteric', 'respiratory', 'tract', 'bovine', 'coronaviruses', 'release', 'virus', 'progeny', '']
Doc tokens post stemming ->  ['infect', 'polar', 'epitheli', 'cell', 'enter', 'respiratori', 'tract', 'bovin', 'coronavirus', 'releas', 'viru', 'progeni', ''] 

Input ->  756	Endogenous IL‐12 synthesis is not required to prevent hyperexpression of type 2 cytokine and antibody responses

Doc id -> 756
Doc text -> Endogenous IL‐12 synthesis is not required to prevent hyperexpression of type 2 cytokine and antibody responses


Doc text


Doc text post processing ->  the more involved in lead-zinc mining risk the less frightened  a psychological typhoon eye perspective 

Doc tokens post whitespace tokenizing ->  ['the', 'more', 'involved', 'in', 'lead', 'zinc', 'mining', 'risk', 'the', 'less', 'frightened', 'a', 'psychological', 'typhoon', 'eye', 'perspective']
Doc tokens post stopword removal ->  ['involved', 'lead', 'zinc', 'mining', 'risk', 'less', 'frightened', 'psychological', 'typhoon', 'eye', 'perspective']
Doc tokens post stemming ->  ['involv', 'lead', 'zinc', 'mine', 'risk', 'less', 'frighten', 'psycholog', 'typhoon', 'eye', 'perspect'] 

Input ->  79302	Evaluation of the antimicrobial efficacy and skin safety of a novel color additive in combination with chlorine disinfectants

Doc id -> 79302
Doc text -> Evaluation of the antimicrobial efficacy and skin safety of a novel color additive in combination with chlorine disinfectants


Doc text post processing ->  evaluation of the antimicrobial efficacy and skin

Doc tokens post stopword removal ->  ['pantropic', 'canine', 'coronavirus', 'genetically', 'related', 'prototype', 'isolate', 'cb', '05', '']
Doc tokens post stemming ->  ['pantrop', 'canin', 'coronaviru', 'genet', 'relat', 'prototyp', 'isol', 'cb', '05', ''] 

Input ->  122396	The changing face of pathogen discovery and surveillance

Doc id -> 122396
Doc text -> The changing face of pathogen discovery and surveillance


Doc text post processing ->  the changing face of pathogen discovery and surveillance 

Doc tokens post whitespace tokenizing ->  ['the', 'changing', 'face', 'of', 'pathogen', 'discovery', 'and', 'surveillance']
Doc tokens post stopword removal ->  ['changing', 'face', 'pathogen', 'discovery', 'surveillance']
Doc tokens post stemming ->  ['chang', 'face', 'pathogen', 'discoveri', 'surveil'] 

Input ->  6885	Severe acute respiratory syndrome and other emerging severe respiratory viral infections

Doc id -> 6885
Doc text -> Severe acute respiratory syndrome and other eme

Doc id -> 153008
Doc text -> Severe odontogenic infections with septic progress – a constant and increasing challenge: a retrospective analysis


Doc text post processing ->  severe odontogenic infections with septic progress   a constant and increasing challenge  a retrospective analysis 

Doc tokens post whitespace tokenizing ->  ['severe', 'odontogenic', 'infections', 'with', 'septic', 'progress', 'a', 'constant', 'and', 'increasing', 'challenge', 'a', 'retrospective', 'analysis']
Doc tokens post stopword removal ->  ['severe', 'odontogenic', 'infections', 'septic', 'progress', 'constant', 'increasing', 'challenge', 'retrospective', 'analysis']
Doc tokens post stemming ->  ['sever', 'odontogen', 'infect', 'septic', 'progress', 'constant', 'increas', 'challeng', 'retrospect', 'analysi'] 

Input ->  104547	COVID‐19 convalescent plasma donor recruitment: beware the Faustian bargains

Doc id -> 104547
Doc text -> COVID‐19 convalescent plasma donor recruitment: beware the Faustian bargai

Doc tokens post stemming ->  ['evalu', 'multiplex', 'pcr', 'assay', 'detect', 'respiratori', 'viral', 'pathogen', 'public', 'health', 'laboratori', 'set', ''] 

Input ->  146706	Onset of occupational hand eczema among healthcare workers during the SARS-CoV-2 pandemic - comparing a single surgical site with a COVID-19 intensive care unit

Doc id -> 146706
Doc text -> Onset of occupational hand eczema among healthcare workers during the SARS-CoV-2 pandemic - comparing a single surgical site with a COVID-19 intensive care unit


Doc text post processing ->  onset of occupational hand eczema among healthcare workers during the sars-cov-2 pandemic - comparing a single surgical site with a covid-19 intensive care unit 

Doc tokens post whitespace tokenizing ->  ['onset', 'of', 'occupational', 'hand', 'eczema', 'among', 'healthcare', 'workers', 'during', 'the', 'sars', 'cov', '2', 'pandemic', 'comparing', 'a', 'single', 'surgical', 'site', 'with', 'a', 'covid', '19', 'intensive', 'care', 'uni


Doc id -> 117933
Doc text -> Influence of ISDN, L-NAME and selenium on microcirculation, leukocyte endothelium interaction and angiogenesis after frostbite.


Doc text post processing ->  influence of isdn  l-name and selenium on microcirculation  leukocyte endothelium interaction and angiogenesis after frostbite  

Doc tokens post whitespace tokenizing ->  ['influence', 'of', 'isdn', 'l', 'name', 'and', 'selenium', 'on', 'microcirculation', 'leukocyte', 'endothelium', 'interaction', 'and', 'angiogenesis', 'after', 'frostbite', '']
Doc tokens post stopword removal ->  ['influence', 'isdn', 'l', 'name', 'selenium', 'microcirculation', 'leukocyte', 'endothelium', 'interaction', 'angiogenesis', 'frostbite', '']
Doc tokens post stemming ->  ['influenc', 'isdn', 'l', 'name', 'selenium', 'microcircul', 'leukocyt', 'endothelium', 'interact', 'angiogenesi', 'frostbit', ''] 

Input ->  101966	COVID-19 infection: Is the outcome time-dependent?

Doc id -> 101966
Doc text -> COVID-19 infection: I

Doc tokens post whitespace tokenizing ->  ['covid', '19', 'what', 'we', 'learn', 'from', 'experience', '']
Doc tokens post stopword removal ->  ['covid', '19', 'learn', 'experience', '']
Doc tokens post stemming ->  ['covid', '19', 'learn', 'experi', ''] 

Input ->  123439	Transmissible gastroenteritis virus infection induces cell apoptosis via activation of p53 signalling.

Doc id -> 123439
Doc text -> Transmissible gastroenteritis virus infection induces cell apoptosis via activation of p53 signalling.


Doc text post processing ->  transmissible gastroenteritis virus infection induces cell apoptosis via activation of p53 signalling  

Doc tokens post whitespace tokenizing ->  ['transmissible', 'gastroenteritis', 'virus', 'infection', 'induces', 'cell', 'apoptosis', 'via', 'activation', 'of', 'p53', 'signalling', '']
Doc tokens post stopword removal ->  ['transmissible', 'gastroenteritis', 'virus', 'infection', 'induces', 'cell', 'apoptosis', 'via', 'activation', 'p53', 'signalling',


Doc id -> 141548
Doc text -> Efficient spread-size approximation of opinion spreading in general social networks


Doc text post processing ->  efficient spread-size approximation of opinion spreading in general social networks 

Doc tokens post whitespace tokenizing ->  ['efficient', 'spread', 'size', 'approximation', 'of', 'opinion', 'spreading', 'in', 'general', 'social', 'networks']
Doc tokens post stopword removal ->  ['efficient', 'spread', 'size', 'approximation', 'opinion', 'spreading', 'general', 'social', 'networks']
Doc tokens post stemming ->  ['effici', 'spread', 'size', 'approxim', 'opinion', 'gener', 'social', 'network'] 

Input ->  144850	Prevention of pressure ulcers among individuals cared for in the prone position: lessons for the COVID-19 emergency

Doc id -> 144850
Doc text -> Prevention of pressure ulcers among individuals cared for in the prone position: lessons for the COVID-19 emergency


Doc text post processing ->  prevention of pressure ulcers among individ

Doc id -> 30347
Doc text -> A Time-Dependent SEIRD Model for Forecasting the COVID-19 Transmission Dynamics


Doc text post processing ->  a time-dependent seird model for forecasting the covid-19 transmission dynamics 

Doc tokens post whitespace tokenizing ->  ['a', 'time', 'dependent', 'seird', 'model', 'for', 'forecasting', 'the', 'covid', '19', 'transmission', 'dynamics']
Doc tokens post stopword removal ->  ['time', 'dependent', 'seird', 'model', 'forecasting', 'covid', '19', 'transmission', 'dynamics']
Doc tokens post stemming ->  ['time', 'depend', 'seird', 'model', 'forecast', 'covid', '19', 'transmiss', 'dynam'] 

Input ->  141156	MERS differs from SARS, say experts.

Doc id -> 141156
Doc text -> MERS differs from SARS, say experts.


Doc text post processing ->  mers differs from sars  say experts  

Doc tokens post whitespace tokenizing ->  ['mers', 'differs', 'from', 'sars', 'say', 'experts', '']
Doc tokens post stopword removal ->  ['mers', 'differs', 'sars', 'say', 'expe

Doc tokens post stopword removal ->  ['functional', 'recovery', 'peripheral', 'nerve', 'block', 'versus', 'general', 'anesthesia', 'upper', 'limb', 'surgery', 'systematic', 'review', 'protocol', '']
Doc tokens post stemming ->  ['function', 'recoveri', 'peripher', 'nerv', 'block', 'versu', 'gener', 'anesthesia', 'upper', 'limb', 'surgeri', 'systemat', 'review', 'protocol', ''] 

Input ->  13408	Middle East respiratory syndrome: new disease, old lessons

Doc id -> 13408
Doc text -> Middle East respiratory syndrome: new disease, old lessons


Doc text post processing ->  middle east respiratory syndrome  new disease  old lessons 

Doc tokens post whitespace tokenizing ->  ['middle', 'east', 'respiratory', 'syndrome', 'new', 'disease', 'old', 'lessons']
Doc tokens post stopword removal ->  ['middle', 'east', 'respiratory', 'syndrome', 'new', 'disease', 'old', 'lessons']
Doc tokens post stemming ->  ['middl', 'east', 'respiratori', 'syndrom', 'new', 'diseas', 'old', 'lesson'] 

Input ->  7

Input ->  65459	Perioperative right ventricular dysfunction.

Doc id -> 65459
Doc text -> Perioperative right ventricular dysfunction.


Doc text post processing ->  perioperative right ventricular dysfunction  

Doc tokens post whitespace tokenizing ->  ['perioperative', 'right', 'ventricular', 'dysfunction', '']
Doc tokens post stopword removal ->  ['perioperative', 'right', 'ventricular', 'dysfunction', '']
Doc tokens post stemming ->  ['periop', 'right', 'ventricular', 'dysfunct', ''] 

Input ->  51803	The relationship between the psychological stress of adolescents in school and the prevalence of chronic low back pain: a cross-sectional study in China.

Doc id -> 51803
Doc text -> The relationship between the psychological stress of adolescents in school and the prevalence of chronic low back pain: a cross-sectional study in China.


Doc text post processing ->  the relationship between the psychological stress of adolescents in school and the prevalence of chronic low back pain  

Doc text post processing ->  high influenza a virus infection rates in mallards bred for hunting in the camargue  south of france 

Doc tokens post whitespace tokenizing ->  ['high', 'influenza', 'a', 'virus', 'infection', 'rates', 'in', 'mallards', 'bred', 'for', 'hunting', 'in', 'the', 'camargue', 'south', 'of', 'france']
Doc tokens post stopword removal ->  ['high', 'influenza', 'virus', 'infection', 'rates', 'mallards', 'bred', 'hunting', 'camargue', 'south', 'france']
Doc tokens post stemming ->  ['high', 'influenza', 'viru', 'infect', 'rate', 'mallard', 'bred', 'hunt', 'camargu', 'south', 'franc'] 

Input ->  6011	ICON (Ivermectin in COvid Nineteen) study: Use of Ivermectin is Associated with Lower Mortality in Hospitalized Patients with COVID19

Doc id -> 6011
Doc text -> ICON (Ivermectin in COvid Nineteen) study: Use of Ivermectin is Associated with Lower Mortality in Hospitalized Patients with COVID19


Doc text post processing ->  icon  ivermectin in covid nineteen  study  us

Doc tokens post stopword removal ->  ['salvage', 'radiotherapy', 'patients', 'affected', 'oligorecurrent', 'pelvic', 'nodal', 'prostate', 'cancer']
Doc tokens post stemming ->  ['salvag', 'radiotherapi', 'patient', 'affect', 'oligorecurr', 'pelvic', 'nodal', 'prostat', 'cancer'] 

Input ->  6411	How to verify the death of a patient.

Doc id -> 6411
Doc text -> How to verify the death of a patient.


Doc text post processing ->  how to verify the death of a patient  

Doc tokens post whitespace tokenizing ->  ['how', 'to', 'verify', 'the', 'death', 'of', 'a', 'patient', '']
Doc tokens post stopword removal ->  ['verify', 'death', 'patient', '']
Doc tokens post stemming ->  ['verifi', 'death', 'patient', ''] 

Input ->  102068	Surgical treatment options for aldosteronomas.

Doc id -> 102068
Doc text -> Surgical treatment options for aldosteronomas.


Doc text post processing ->  surgical treatment options for aldosteronomas  

Doc tokens post whitespace tokenizing ->  ['surgical', 'treat


Doc tokens post whitespace tokenizing ->  ['editor', 's', 'choice']
Doc tokens post stopword removal ->  ['editor', 'choice']
Doc tokens post stemming ->  ['editor', 'choic'] 

Input ->  4147	Physicians Poster Abstracts: EBMT 2012

Doc id -> 4147
Doc text -> Physicians Poster Abstracts: EBMT 2012


Doc text post processing ->  physicians poster abstracts  ebmt 2012 

Doc tokens post whitespace tokenizing ->  ['physicians', 'poster', 'abstracts', 'ebmt', '2012']
Doc tokens post stopword removal ->  ['physicians', 'poster', 'abstracts', 'ebmt', '2012']
Doc tokens post stemming ->  ['physician', 'poster', 'abstract', 'ebmt', '2012'] 

Input ->  142135	A corn-based delivery system for animal vaccines: an oral transmissible gastroenteritis virus vaccine boosts lactogenic immunity in swine

Doc id -> 142135
Doc text -> A corn-based delivery system for animal vaccines: an oral transmissible gastroenteritis virus vaccine boosts lactogenic immunity in swine


Doc text post processing ->  a cor

Doc text post processing ->  challenges and opportunities for respiratory syncytial virus vaccines 

Doc tokens post whitespace tokenizing ->  ['challenges', 'and', 'opportunities', 'for', 'respiratory', 'syncytial', 'virus', 'vaccines']
Doc tokens post stopword removal ->  ['challenges', 'opportunities', 'respiratory', 'syncytial', 'virus', 'vaccines']
Doc tokens post stemming ->  ['challeng', 'opportun', 'respiratori', 'syncyti', 'viru', 'vaccin'] 

Input ->  4288	Basic Income Advocacy in Canada: Multiple Streams, Experiments and the Road Ahead

Doc id -> 4288
Doc text -> Basic Income Advocacy in Canada: Multiple Streams, Experiments and the Road Ahead


Doc text post processing ->  basic income advocacy in canada  multiple streams  experiments and the road ahead 

Doc tokens post whitespace tokenizing ->  ['basic', 'income', 'advocacy', 'in', 'canada', 'multiple', 'streams', 'experiments', 'and', 'the', 'road', 'ahead']
Doc tokens post stopword removal ->  ['basic', 'income', 'advoc

Doc tokens post stemming ->  ['transcathet', 'embol', 'manag', 'epistaxi', ''] 

Input ->  81103	[Comparison of severe acute respiratory syndrome with community-acquired pneumonia].

Doc id -> 81103
Doc text -> [Comparison of severe acute respiratory syndrome with community-acquired pneumonia].


Doc text post processing ->   comparison of severe acute respiratory syndrome with community-acquired pneumonia   

Doc tokens post whitespace tokenizing ->  ['', 'comparison', 'of', 'severe', 'acute', 'respiratory', 'syndrome', 'with', 'community', 'acquired', 'pneumonia', '']
Doc tokens post stopword removal ->  ['', 'comparison', 'severe', 'acute', 'respiratory', 'syndrome', 'community', 'acquired', 'pneumonia', '']
Doc tokens post stemming ->  ['', 'comparison', 'sever', 'acut', 'respiratori', 'syndrom', 'commun', 'acquir', 'pneumonia'] 

Input ->  57050	Mouse hepatitis virus neurovirulence: evidence of a linkage between S glycoprotein expression and immunopathology

Doc id -> 57050
Doc te

Doc tokens post stemming ->  ['clinic', 'ct', 'imag', 'featur', '2019', 'novel', 'coronaviru', 'diseas', 'covid', '19', ''] 

Input ->  85447	Packing of charged chains on toroidal geometries.

Doc id -> 85447
Doc text -> Packing of charged chains on toroidal geometries.


Doc text post processing ->  packing of charged chains on toroidal geometries  

Doc tokens post whitespace tokenizing ->  ['packing', 'of', 'charged', 'chains', 'on', 'toroidal', 'geometries', '']
Doc tokens post stopword removal ->  ['packing', 'charged', 'chains', 'toroidal', 'geometries', '']
Doc tokens post stemming ->  ['pack', 'charg', 'chain', 'toroid', 'geometri', ''] 

Input ->  87285	ANTIQUE: A Non-factoid Question Answering Benchmark

Doc id -> 87285
Doc text -> ANTIQUE: A Non-factoid Question Answering Benchmark


Doc text post processing ->  antique  a non-factoid question answering benchmark 

Doc tokens post whitespace tokenizing ->  ['antique', 'a', 'non', 'factoid', 'question', 'answering', 'benchmar


Input ->  61787	Design, synthesis, antiviral and cytotoxic evaluation of novel acyclic phosphonate nucleotide analogues with a 5,6-dihydro-1H-[1,2,3]triazolo[4,5-d]pyridazine-4,7-dione system

Doc id -> 61787
Doc text -> Design, synthesis, antiviral and cytotoxic evaluation of novel acyclic phosphonate nucleotide analogues with a 5,6-dihydro-1H-[1,2,3]triazolo[4,5-d]pyridazine-4,7-dione system


Doc text post processing ->  design  synthesis  antiviral and cytotoxic evaluation of novel acyclic phosphonate nucleotide analogues with a 5 6-dihydro-1h- 1 2 3 triazolo 4 5-d pyridazine-4 7-dione system 

Doc tokens post whitespace tokenizing ->  ['design', 'synthesis', 'antiviral', 'and', 'cytotoxic', 'evaluation', 'of', 'novel', 'acyclic', 'phosphonate', 'nucleotide', 'analogues', 'with', 'a', '5', '6', 'dihydro', '1h', '1', '2', '3', 'triazolo', '4', '5', 'd', 'pyridazine', '4', '7', 'dione', 'system']
Doc tokens post stopword removal ->  ['design', 'synthesis', 'antiviral', 'cytotoxic', 

Doc tokens post stemming ->  ['coinfect', 'hospit', 'children', 'commun', 'acquir', 'pneumonia', 'mean', 'clinician', ''] 

Input ->  66703	Characterizing and controlling the inflammatory network during influenza A virus infection

Doc id -> 66703
Doc text -> Characterizing and controlling the inflammatory network during influenza A virus infection


Doc text post processing ->  characterizing and controlling the inflammatory network during influenza a virus infection 

Doc tokens post whitespace tokenizing ->  ['characterizing', 'and', 'controlling', 'the', 'inflammatory', 'network', 'during', 'influenza', 'a', 'virus', 'infection']
Doc tokens post stopword removal ->  ['characterizing', 'controlling', 'inflammatory', 'network', 'influenza', 'virus', 'infection']
Doc tokens post stemming ->  ['character', 'control', 'inflammatori', 'network', 'influenza', 'viru', 'infect'] 

Input ->  93921	Survival of Coronaviruses in Water and Wastewater

Doc id -> 93921
Doc text -> Survival of Coro

Doc tokens post stemming ->  ['immunogen', 'protect', 'efficaci', 'monomer', 'trimer', 'recombin', 'sar', 'coronaviru', 'spike', 'protein', 'subunit', 'vaccin', 'candid', ''] 

Input ->  93865	U.S. county-level characteristics to inform equitable COVID-19 response

Doc id -> 93865
Doc text -> U.S. county-level characteristics to inform equitable COVID-19 response


Doc text post processing ->  u s  county-level characteristics to inform equitable covid-19 response 

Doc tokens post whitespace tokenizing ->  ['u', 's', 'county', 'level', 'characteristics', 'to', 'inform', 'equitable', 'covid', '19', 'response']
Doc tokens post stopword removal ->  ['u', 'county', 'level', 'characteristics', 'inform', 'equitable', 'covid', '19', 'response']
Doc tokens post stemming ->  ['u', 'counti', 'level', 'characterist', 'inform', 'equit', 'covid', '19', 'respons'] 

Input ->  67415	Using simulation for training and to change protocol during the outbreak of severe acute respiratory syndrome

Doc id 

Doc tokens post stemming ->  ['case', 'report', 'detect', 'middl', 'east', 'respiratori', 'syndrom', 'corona', 'viru', 'mer', 'cov', 'nasal', 'secret', 'dead', 'human'] 

Input ->  82282	Arguments in favour of remdesivir for treating SARS-CoV-2 infections

Doc id -> 82282
Doc text -> Arguments in favour of remdesivir for treating SARS-CoV-2 infections


Doc text post processing ->  arguments in favour of remdesivir for treating sars-cov-2 infections 

Doc tokens post whitespace tokenizing ->  ['arguments', 'in', 'favour', 'of', 'remdesivir', 'for', 'treating', 'sars', 'cov', '2', 'infections']
Doc tokens post stopword removal ->  ['arguments', 'favour', 'remdesivir', 'treating', 'sars', 'cov', '2', 'infections']
Doc tokens post stemming ->  ['argument', 'favour', 'remdesivir', 'treat', 'sar', 'cov', '2', 'infect'] 

Input ->  106263	Combination therapy with single incision laparoscopic surgery and double-balloon endoscopy for small intestinal bleeding: report of three cases.

Doc id ->

Doc text post processing ->  amphotericin b increases influenza a virus infection by preventing ifitm3-mediated restriction 

Doc tokens post whitespace tokenizing ->  ['amphotericin', 'b', 'increases', 'influenza', 'a', 'virus', 'infection', 'by', 'preventing', 'ifitm3', 'mediated', 'restriction']
Doc tokens post stopword removal ->  ['amphotericin', 'b', 'increases', 'influenza', 'virus', 'infection', 'preventing', 'ifitm3', 'mediated', 'restriction']
Doc tokens post stemming ->  ['amphotericin', 'b', 'increas', 'influenza', 'viru', 'infect', 'prevent', 'ifitm3', 'mediat', 'restrict'] 

Input ->  19214	Mapping of the RNA-binding domain of the porcine reproductive and respiratory syndrome virus nucleocapsid protein.

Doc id -> 19214
Doc text -> Mapping of the RNA-binding domain of the porcine reproductive and respiratory syndrome virus nucleocapsid protein.


Doc text post processing ->  mapping of the rna-binding domain of the porcine reproductive and respiratory syndrome virus nucle

Doc tokens post stemming ->  ['molecular', 'subtyp', 'reveal', 'immun', 'alter', 'idh', 'wild', 'type', 'lower', 'grade', 'diffus', 'glioma', ''] 

Input ->  90282	The Italian health system and the COVID-19 challenge

Doc id -> 90282
Doc text -> The Italian health system and the COVID-19 challenge


Doc text post processing ->  the italian health system and the covid-19 challenge 

Doc tokens post whitespace tokenizing ->  ['the', 'italian', 'health', 'system', 'and', 'the', 'covid', '19', 'challenge']
Doc tokens post stopword removal ->  ['italian', 'health', 'system', 'covid', '19', 'challenge']
Doc tokens post stemming ->  ['italian', 'health', 'system', 'covid', '19', 'challeng'] 

Input ->  36778	Gastritis might be considered as a technical factor affecting laparoscopic sleeve gastrectomy.

Doc id -> 36778
Doc text -> Gastritis might be considered as a technical factor affecting laparoscopic sleeve gastrectomy.


Doc text post processing ->  gastritis might be considered as a tech

Doc tokens post stemming ->  ['', 'organiz', 'impact', 'clinic', 'challeng', 'covid', '19', 'pandem', 'swiss', 'tertiari', 'intern', 'medicin', 'depart'] 

Input ->  24992	A novel anti-mycobacterial function of mitogen-activated protein kinase phosphatase-1

Doc id -> 24992
Doc text -> A novel anti-mycobacterial function of mitogen-activated protein kinase phosphatase-1


Doc text post processing ->  a novel anti-mycobacterial function of mitogen-activated protein kinase phosphatase-1 

Doc tokens post whitespace tokenizing ->  ['a', 'novel', 'anti', 'mycobacterial', 'function', 'of', 'mitogen', 'activated', 'protein', 'kinase', 'phosphatase', '1']
Doc tokens post stopword removal ->  ['novel', 'anti', 'mycobacterial', 'function', 'mitogen', 'activated', 'protein', 'kinase', 'phosphatase', '1']
Doc tokens post stemming ->  ['novel', 'anti', 'mycobacteri', 'function', 'mitogen', 'activ', 'protein', 'kinas', 'phosphatas', '1'] 

Input ->  113694	“The COVID-19 Generation”: A Cautionary No

Doc tokens post stemming ->  ['endovascular', 'manag', 'abdomin', 'aortic', 'aneurysm', 'year', 'review', ''] 

Input ->  76434	COVID-19 and Financial Vulnerability: What Health Care Organizations and Society Owe Each Other

Doc id -> 76434
Doc text -> COVID-19 and Financial Vulnerability: What Health Care Organizations and Society Owe Each Other


Doc text post processing ->  covid-19 and financial vulnerability  what health care organizations and society owe each other 

Doc tokens post whitespace tokenizing ->  ['covid', '19', 'and', 'financial', 'vulnerability', 'what', 'health', 'care', 'organizations', 'and', 'society', 'owe', 'each', 'other']
Doc tokens post stopword removal ->  ['covid', '19', 'financial', 'vulnerability', 'health', 'care', 'organizations', 'society', 'owe']
Doc tokens post stemming ->  ['covid', '19', 'financi', 'vulner', 'health', 'care', 'organ', 'societi', 'owe'] 

Input ->  135998	Helmet Modification to PPE With 3D Printing During the COVID-19 Pandemic at 

Input query ->  the novel coronavirus

Query Post Processing ->  the novel coronavirus
Doc tokens post whitespace tokenizing ->  ['the', 'novel', 'coronavirus']
Doc tokens post stopword removal ->  ['novel', 'coronavirus']
Doc tokens post stemming ->  ['novel', 'coronaviru'] 

Input query ->  from an epidemic to a pandemic

Query Post Processing ->  from an epidemic to a pandemic
Doc tokens post whitespace tokenizing ->  ['from', 'an', 'epidemic', 'to', 'a', 'pandemic']
Doc tokens post stopword removal ->  ['epidemic', 'pandemic']
Doc tokens post stemming ->  ['epidem', 'pandem'] 

Input query ->  is hydroxychloroquine effective?
Query Post Processing ->  is hydroxychloroquine effective 
Doc tokens post whitespace tokenizing ->  ['is', 'hydroxychloroquine', 'effective']
Doc tokens post stopword removal ->  ['hydroxychloroquine', 'effective']
Doc tokens post stemming ->  ['hydroxychloroquin', 'effect'] 

