# Search Engine For Candidate Sentences

## Demonstration of how to use the simple search engine for fetching relevant sentences

Let's import our search engine for `src` directory.

First, one needs to set the Python source files environment variables for Juptyer Notebook. If you haven't done this, please run those two command BEFORE running Juptyer Notebook:
1. `export PYTHONPATH=/path/to/covid19/src`
2. `export JUPYTER_PATH=/path/to/covid19/src`

In [19]:
import pandas as pd
from gensim.models.phrases import Phraser

from nlp.cleaning import clean_tokenized_sentence
from w2v.synonyms import Synonyms
from pprint import pprint
import operator
from typing import List
from datetime import datetime
import os

In [2]:
data_dir = "../../../workspace/kaggle/covid19/data"

Initialize out SearchEngine object with:
1. Sentences metadata
2. bi-gram model
3. tri-gram model
4. Trained FastText vectors

In [13]:
sentences_df = pd.read_csv(os.path.join(data_dir, "sentences_with_metadata.csv"))

In [4]:
def create_articles_metadata_mapping(sentences_df: pd.DataFrame) -> dict:
    sentence_id_to_metadata = {}
    for row_count, row in sentences_df.iterrows():
        sentence_id_to_metadata[row_count] = dict(
            paper_id=row['paper_id'],
            cord_uid=row['cord_uid'],
            source=row['source'],
            url=row['url'],
            publish_time=row['publish_time'],
            authors=row['authors'],
            section=row['section'],
            sentence=row['sentence'],
        )
    return sentence_id_to_metadata

In [5]:
sentence_id_to_metadata = create_articles_metadata_mapping(sentences_df)

In [14]:
synonyms_model = Synonyms(os.path.join(data_dir, "fasttext_no_subwords_trigrams/word-vectors-100d.txt"))

Loading fasttext model: ../../../workspace/kaggle/covid19/data/fasttext_no_subwords_trigrams/word-vectors-100d.txt
Finished loading fasttext model: ../../../workspace/kaggle/covid19/data/fasttext_no_subwords_trigrams/word-vectors-100d.txt


In [61]:
class SearchEngine:
    def __init__(self,
                 sentence_id_to_metadata: dict,
                 sentences_df: pd.DataFrame,
                 bigram_model_path: str,
                 trigram_model_path: str,
                 synonyms_model):
        self.sentence_id_to_metadata = sentence_id_to_metadata
        self.cleaned_sentences = sentences_df['cleaned_sentence'].tolist()
        print(f'Loaded {len(self.cleaned_sentences)} sentences')

        print(f'Loading bi-gram model: {bigram_model_path}')
        self.bigram_model = Phraser.load(bigram_model_path)
        print(f'Finished loading bi-gram model: {bigram_model_path}')

        print(f'Loading tri-gram model: {trigram_model_path}')
        self.trigram_model = Phraser.load(trigram_model_path)
        print(f'Finished loading tri-gram model: {trigram_model_path}')

        self.synonyms_model = synonyms_model

    def _get_search_terms(self, keywords, synonyms_threshold):
        # clean tokens
        cleaned_terms = [clean_tokenized_sentence(keyword.split(' ')) for keyword in keywords]
        # remove empty terms
        cleaned_terms = [term for term in cleaned_terms if term]
        # create bi-grams
        terms_with_bigrams = self.bigram_model[' '.join(cleaned_terms).split(' ')]
        # create tri-grams
        terms_with_trigrams = self.trigram_model[terms_with_bigrams]
        # expand query with synonyms
        search_terms = [self.synonyms_model.get_synonyms(token) for token in terms_with_trigrams]
        # filter synonyms above threshold (and flatten the list of lists)
        search_terms = [synonym[0] for synonyms in search_terms for synonym in synonyms
                        if synonym[1] >= synonyms_threshold]
        # expand keywords with synonyms
        search_terms = list(terms_with_trigrams) + search_terms
        return search_terms

    def search(self,
               keywords: List[str],
               optional_keywords=None,
               top_n: int = 10,
               synonyms_threshold=0.7,
               keyword_weight: float = 3.0,
               optional_keyword_weight: float = 0.5) -> List[dict]:
        if optional_keywords is None:
            optional_keywords = []

        search_terms = self._get_search_terms(keywords, synonyms_threshold)

        optional_search_terms = self._get_search_terms(optional_keywords, synonyms_threshold) \
            if optional_keywords else []

        print(f'Search terms after cleaning, bigrams, trigrams and synonym expansion: {search_terms}')
        print(f'Optional search terms after cleaning, bigrams, trigrams and synonym expansion: {optional_search_terms}')

        date_today = datetime.today()

        # calculate score for each sentence. Take only sentence with at least one match from the must-have keywords
        indexes = []
        match_counts = []
        days_diffs = []
        for sentence_index, sentence in enumerate(self.cleaned_sentences):
            sentence_tokens = sentence.split(' ')
            sentence_tokens_set = set(sentence_tokens)
            match_count = sum([keyword_weight if keyword in sentence_tokens_set else 0
                               for keyword in search_terms])
            if match_count > 0:
                indexes.append(sentence_index)
                if optional_search_terms:
                    match_count += sum([optional_keyword_weight if keyword in sentence_tokens_set else 0
                                       for keyword in optional_search_terms])
                match_counts.append(match_count)
                article_date = self.sentence_id_to_metadata[sentence_index]["publish_time"]
                
                if article_date == "2020":
                    article_date = "2020-01-01"

                article_date = datetime.strptime(article_date, "%Y-%m-%d")
                days_diff = (date_today - article_date).days
                days_diffs.append(days_diff)

        # the bigger the better
        match_counts = [float(match_count)/sum(match_counts) for match_count in match_counts]

        # the lesser the better
        days_diffs = [(max(days_diffs) - days_diff) for days_diff in days_diffs]
        days_diffs = [float(days_diff)/sum(days_diffs) for days_diff in days_diffs]

        index_to_score = {}
        for index, match_count, days_diff in zip(indexes, match_counts, days_diffs):
            index_to_score[index] = 0.7 * match_count + 0.3 * days_diff

        # sort by score descending
        sorted_indexes = sorted(index_to_score.items(), key=operator.itemgetter(1), reverse=True)

        # take only the sentence IDs
        sorted_indexes = [item[0] for item in sorted_indexes]

        # limit results
        sorted_indexes = sorted_indexes[0: min(top_n, len(sorted_indexes))]

        # get metadata for each sentence
        results = []
        for index in sorted_indexes:
            results.append(self.sentence_id_to_metadata[index])
        return results

In [62]:
search_engine = SearchEngine(
    sentence_id_to_metadata,
    sentences_df,
    os.path.join(data_dir, "covid_bigram_model_v0.pkl"),
    os.path.join(data_dir, "covid_trigram_model_v0.pkl"),
    synonyms_model
)

Loaded 249343 sentences
Loading bi-gram model: ../../../workspace/kaggle/covid19/data/covid_bigram_model_v0.pkl
Finished loading bi-gram model: ../../../workspace/kaggle/covid19/data/covid_bigram_model_v0.pkl
Loading tri-gram model: ../../../workspace/kaggle/covid19/data/covid_trigram_model_v0.pkl
Finished loading tri-gram model: ../../../workspace/kaggle/covid19/data/covid_trigram_model_v0.pkl


Simple search function that gets a list of keywords to search:

In [63]:
def search(keywords, optional_keywords=None, top_n=10, synonyms_threshold=0.8, only_sentences=False):
    print(f"\nSearch for terms {keywords}\n\n")
    results = search_engine.search(
        keywords, optional_keywords=optional_keywords, top_n=top_n, synonyms_threshold=synonyms_threshold
    )
    print("\nResults:\n")
    
    if only_sentences:
        for result in results:
            print(result['sentence'] + "\n")
    else:
        pprint(results)

Let's see some examples:

In [64]:
search(keywords=["animals", "zoonotic", "spillover", "animal to human",
             "bats", "snakes", "exotic animals", "seafood"],
       optional_keywords=["new_coronavirus", "coronavirus", "covid19"])


Search for terms ['animals', 'zoonotic', 'spillover', 'animal to human', 'bats', 'snakes', 'exotic animals', 'seafood']


Search terms after cleaning, bigrams, trigrams and synonym expansion: ['animals', 'zoonotic_spillover', 'animal_human', 'bats', 'snakes', 'exotic_animals', 'seafood', 'spillover_humans', 'hostswitching_events', 'battohuman', 'host_switching_events', 'from_dromedaries_humans', 'interspecies_jumping', 'animaltoanimal_animaltohuman', 'humantohuman_transmission_events', 'emergence_events', 'zoonotic_spillover_events', 'human_animal', 'bat_species', 'fruit_bats', 'insectivorous_bats', 'species_bats', 'bat', 'exotic_pets', 'wild_animal_species', 'consumption_raw', 'salads', 'ice_cream', 'dairy_products', 'cold_cuts', 'contaminated_beef', 'beef_products', 'meats', 'improperly_cooked', 'raw_undercooked']
Optional search terms after cleaning, bigrams, trigrams and synonym expansion: ['newcoronavirus', 'coronavirus_covid19', '2019ncov_covid19', 'outbreak_2019_novel', 'sarsco

In [65]:
search(keywords=["seasonality", "transmission", "humidity", "heat", "summer"],
       optional_keywords=["new_coronavirus", "coronavirus", "covid19"])


Search for terms ['seasonality', 'transmission', 'humidity', 'heat', 'summer']


Search terms after cleaning, bigrams, trigrams and synonym expansion: ['seasonality', 'transmission', 'humidity_heat', 'summer', 'seasonal_patterns', 'seasonal_variation', 'seasonal_pattern', 'seasonal_trends', 'seasonality_influenza', 'seasonal_variations', 'trans_mission', 'transmissions', 'disease_transmission', 'contact_transmission', 'overall_discomfort', 'microclimate_temperature_humidity', 'emotional_benefits', 'perceived_comfort', 'n95mask_combination', 'perceived_exertion', 'thermophysiological', 'nanofunctional', 'physical_discomfort', 'subjective_ratings', 'winter', 'autumn', 'during_winter', 'during_summer', 'rainy', 'winter_summer', 'summer_autumn', 'fall_winter', 'cold_winter', 'during_winter_spring']
Optional search terms after cleaning, bigrams, trigrams and synonym expansion: ['newcoronavirus', 'coronavirus_covid19', '2019ncov_covid19', 'outbreak_2019_novel', 'sarscov2_2019ncov', 'coronav

In [66]:
search(["incubation_time", "incubation", "age"],
       optional_keywords=["new_coronavirus", "coronavirus", "covid19"],
       top_n=20, only_sentences=True)


Search for terms ['incubation_time', 'incubation', 'age']


Search terms after cleaning, bigrams, trigrams and synonym expansion: ['incubationtime', 'incubation', 'age']
Optional search terms after cleaning, bigrams, trigrams and synonym expansion: ['newcoronavirus', 'coronavirus_covid19', '2019ncov_covid19', 'outbreak_2019_novel', 'sarscov2_2019ncov', 'coronavirus_2019ncov', 'ongoing_outbreak_novel_coronavirus', 'since_late_december', 'ongoing_outbreak_covid19', 'originating_wuhan_china', 'novel_coronavirus_outbreak', 'wuhan_coronavirus']

Results:

It suggests that age 40 can be a key age cutoff for the incubation of COVID-19 along with previous statistical analysis.  

When we analyzed the positive rate according to age (Table 3) , and we could see that positive rate increased from 24.90% (age 18-30) to 61.81% (age >70).

Given the known incubation periodbetween 1 and 14 daysthe interspecies transmission could have occurred as late as November 2019.

Patients ≥70 years of age have 

In [67]:
search(["Prevalence", "asymptomatic", "shedding", "transmission", "children"],
       optional_keywords=["new_coronavirus", "coronavirus", "covid19"],
       top_n=20, only_sentences=True)


Search for terms ['Prevalence', 'asymptomatic', 'shedding', 'transmission', 'children']


Search terms after cleaning, bigrams, trigrams and synonym expansion: ['prevalence', 'asymptomatic_shedding', 'transmission', 'children', 'prevalence_rates', 'prolonged_shedding', 'asymptomatic_persistence', 'persistent_shedding', 'trans_mission', 'transmissions', 'disease_transmission', 'contact_transmission', 'young_children', 'children_who', 'infants_children', 'older_children', 'infants', 'adults', 'children_adults', 'among_children', 'healthy_children']
Optional search terms after cleaning, bigrams, trigrams and synonym expansion: ['newcoronavirus', 'coronavirus_covid19', '2019ncov_covid19', 'outbreak_2019_novel', 'sarscov2_2019ncov', 'coronavirus_2019ncov', 'ongoing_outbreak_novel_coronavirus', 'since_late_december', 'ongoing_outbreak_covid19', 'originating_wuhan_china', 'novel_coronavirus_outbreak', 'wuhan_coronavirus']

Results:

52 CAP of mixed etiology has been characterized less in adu

In [68]:
search(["seasonality", "transmission", "humidity", "heat", "summer"],
       optional_keywords=["new_coronavirus", "coronavirus", "covid19"],
       top_n=10, only_sentences=True)


Search for terms ['seasonality', 'transmission', 'humidity', 'heat', 'summer']


Search terms after cleaning, bigrams, trigrams and synonym expansion: ['seasonality', 'transmission', 'humidity_heat', 'summer', 'seasonal_patterns', 'seasonal_variation', 'seasonal_pattern', 'seasonal_trends', 'seasonality_influenza', 'seasonal_variations', 'trans_mission', 'transmissions', 'disease_transmission', 'contact_transmission', 'overall_discomfort', 'microclimate_temperature_humidity', 'emotional_benefits', 'perceived_comfort', 'n95mask_combination', 'perceived_exertion', 'thermophysiological', 'nanofunctional', 'physical_discomfort', 'subjective_ratings', 'winter', 'autumn', 'during_winter', 'during_summer', 'rainy', 'winter_summer', 'summer_autumn', 'fall_winter', 'cold_winter', 'during_winter_spring']
Optional search terms after cleaning, bigrams, trigrams and synonym expansion: ['newcoronavirus', 'coronavirus_covid19', '2019ncov_covid19', 'outbreak_2019_novel', 'sarscov2_2019ncov', 'coronav

In [69]:
search(["adhesion", "hydrophilic", "hydrophobic", "surfaces", "decontamination"],
       optional_keywords=["new_coronavirus", "coronavirus", "covid19"],
       top_n=10, only_sentences=True)


Search for terms ['adhesion', 'hydrophilic', 'hydrophobic', 'surfaces', 'decontamination']


Search terms after cleaning, bigrams, trigrams and synonym expansion: ['adhesion', 'hydrophilic_hydrophobic', 'surfaces', 'decontamination', 'hydrophobic_hydrophilic', 'ligand_shell', 'oppositely_charged', 'hydrophilichydrophobic', 'polar_nonpolar', 'adsorbent_surface', 'anionic_cationic', 'neutral_charged', 'both_hydrophilic_hydrophobic', 'micelles_liposomes', 'disinfection']
Optional search terms after cleaning, bigrams, trigrams and synonym expansion: ['newcoronavirus', 'coronavirus_covid19', '2019ncov_covid19', 'outbreak_2019_novel', 'sarscov2_2019ncov', 'coronavirus_2019ncov', 'ongoing_outbreak_novel_coronavirus', 'since_late_december', 'ongoing_outbreak_covid19', 'originating_wuhan_china', 'novel_coronavirus_outbreak', 'wuhan_coronavirus']

Results:

Although the viral load of coronaviruses on inanimate surfaces is not known during an outbreak situation it seem plausible to reduce the vi

In [70]:
search(["Persistence", "stability","nasal discharge", "sputum", "urine", "fecal matter"],
       optional_keywords=["new_coronavirus", "coronavirus", "covid19"],
       top_n=10, only_sentences=True)


Search for terms ['Persistence', 'stability', 'nasal discharge', 'sputum', 'urine', 'fecal matter']


Search terms after cleaning, bigrams, trigrams and synonym expansion: ['persistence', 'stability', 'nasal_discharge', 'sputum_urine', 'fecal_matter', 'sneezing_nasal', 'sneezing_nasal_discharge', 'ocular_discharge', 'watery_eyes', 'sneezing_cough', 'discharge_cough', 'cough_nasal', 'swollen_sinuses', 'eye_discharge', 'tracheal_rales', 'urine_sputum', 'lavage_specimen', 'sputum_endotracheal', 'pleural_tap', 'sputum_bal_fluid', 'fluid_pleural_fluid', 'antibiotic_treatment_commenced', 'sputum_bronchoalveolar_lavage_fluid', 'culture_sputum', 'sent_culture', 'secretions_feces', 'sweat_urine', 'saliva_nasal', 'contaminated_feces', 'other_body_secretions', 'secretions_urine', 'faecal_material', 'sweat_tears', 'animal_feces', 'fecally_contaminated']
Optional search terms after cleaning, bigrams, trigrams and synonym expansion: ['newcoronavirus', 'coronavirus_covid19', '2019ncov_covid19', 'out

In [71]:
search(["Persistence", "materials", "copper", "stainless steel", "plastic"],
       optional_keywords=["new_coronavirus", "coronavirus", "covid19"],
       top_n=10, only_sentences=True)


Search for terms ['Persistence', 'materials', 'copper', 'stainless steel', 'plastic']


Search terms after cleaning, bigrams, trigrams and synonym expansion: ['persistence', 'materials', 'copper', 'stainless_steel', 'plastic', 'stainless', 'galvanized_steel']
Optional search terms after cleaning, bigrams, trigrams and synonym expansion: ['newcoronavirus', 'coronavirus_covid19', '2019ncov_covid19', 'outbreak_2019_novel', 'sarscov2_2019ncov', 'coronavirus_2019ncov', 'ongoing_outbreak_novel_coronavirus', 'since_late_december', 'ongoing_outbreak_covid19', 'originating_wuhan_china', 'novel_coronavirus_outbreak', 'wuhan_coronavirus']

Results:

HCoV-19 was most stable on plastic and stainless steel and viable virus could be detected up to 33 72 hours post application ( Figure 1A ), though the virus titer was greatly reduced (plastic from 10 3.7 to 34 10 0.6 TCID 50 /mL after 72 hours, stainless steel from 10 3.7 to 10 0.6 TCID 50 /mL after 48 hours).

This can occur via direct contact with 

In [72]:
search(["natural", "history", "virus", "infected"],
       optional_keywords=["new_coronavirus", "coronavirus", "covid19"],
       top_n=10, only_sentences=True)


Search for terms ['natural', 'history', 'virus', 'infected']


Search terms after cleaning, bigrams, trigrams and synonym expansion: ['natural_history', 'virus', 'infected']
Optional search terms after cleaning, bigrams, trigrams and synonym expansion: ['newcoronavirus', 'coronavirus_covid19', '2019ncov_covid19', 'outbreak_2019_novel', 'sarscov2_2019ncov', 'coronavirus_2019ncov', 'ongoing_outbreak_novel_coronavirus', 'since_late_december', 'ongoing_outbreak_covid19', 'originating_wuhan_china', 'novel_coronavirus_outbreak', 'wuhan_coronavirus']

Results:

The virus is transmitted mainly through infected respiratory droplets and on close contact with the infected person.

An increased level of viral diversity was found in some SARS-CoV-2 infected patients , suggesting that the virus has begun to adapt to the human environment and its genomes have begun to evolve in the population.

Since the virus emerged at the seafood wholesale market at the end of last year (Zhu et al., 2019) , the n

In [73]:
search(["implementation", "diagnostics", "product", "clinical", "process"],
       optional_keywords=["new_coronavirus", "coronavirus", "covid19"],
       top_n=10, only_sentences=True)


Search for terms ['implementation', 'diagnostics', 'product', 'clinical', 'process']


Search terms after cleaning, bigrams, trigrams and synonym expansion: ['implementation', 'diagnostics', 'product', 'clinical', 'process', 'implementing', 'adoption', 'products']
Optional search terms after cleaning, bigrams, trigrams and synonym expansion: ['newcoronavirus', 'coronavirus_covid19', '2019ncov_covid19', 'outbreak_2019_novel', 'sarscov2_2019ncov', 'coronavirus_2019ncov', 'ongoing_outbreak_novel_coronavirus', 'since_late_december', 'ongoing_outbreak_covid19', 'originating_wuhan_china', 'novel_coronavirus_outbreak', 'wuhan_coronavirus']

Results:

Effective teamwork can increase the efficiency of the work, in the process of care, while buy and use the new improved care products, not only more convenient and effective; the current hospital has been promoted to the implementation of the respiratory care ward, To share the unit to promote the implementation of experience, in order to achieve

In [74]:
search(["disease models", "animals", "infection", "transmission"],
       optional_keywords=["new_coronavirus", "coronavirus", "covid19"],
       top_n=10, only_sentences=True)


Search for terms ['disease models', 'animals', 'infection', 'transmission']


Search terms after cleaning, bigrams, trigrams and synonym expansion: ['disease', 'models', 'animals', 'infection', 'transmission', 'model', 'infections', 'trans_mission', 'transmissions', 'disease_transmission', 'contact_transmission']
Optional search terms after cleaning, bigrams, trigrams and synonym expansion: ['newcoronavirus', 'coronavirus_covid19', '2019ncov_covid19', 'outbreak_2019_novel', 'sarscov2_2019ncov', 'coronavirus_2019ncov', 'ongoing_outbreak_novel_coronavirus', 'since_late_december', 'ongoing_outbreak_covid19', 'originating_wuhan_china', 'novel_coronavirus_outbreak', 'wuhan_coronavirus']

Results:

agent-based) and represent each individual in a specific community, matching recent census and other government data.7, 8 We previously developed individual-based simulation models for population centres in Australia, South Africa, Thailand, Vietnam and Papua New Guinea, all using the same underl

In [75]:
search(["Tools", "studies", "monitor", "phenotypic change", "potential adaptation", "virus", "mutation"],
       optional_keywords=["new_coronavirus", "coronavirus", "covid19"],
       top_n=10, only_sentences=True)


Search for terms ['Tools', 'studies', 'monitor', 'phenotypic change', 'potential adaptation', 'virus', 'mutation']


Search terms after cleaning, bigrams, trigrams and synonym expansion: ['tools', 'studies', 'monitor', 'phenotypic_change', 'potential', 'adaptation', 'virus', 'mutation', 'powerful_computational', 'adaption', 'mutations', 'point_mutation', 'point_mutations']
Optional search terms after cleaning, bigrams, trigrams and synonym expansion: ['newcoronavirus', 'coronavirus_covid19', '2019ncov_covid19', 'outbreak_2019_novel', 'sarscov2_2019ncov', 'coronavirus_2019ncov', 'ongoing_outbreak_novel_coronavirus', 'since_late_december', 'ongoing_outbreak_covid19', 'originating_wuhan_china', 'novel_coronavirus_outbreak', 'wuhan_coronavirus']

Results:

Here, we analyzed the potential mutations that may have evolved after the virus became epidemic among humans and also the mutations resulting in the human adaptation.

Thus, it is urgent to tightly monitor the mutation and adaptation of

In [76]:
search(["Immune", "Immunity", "Immune response"],
       optional_keywords=["new_coronavirus", "coronavirus", "covid19"],
       top_n=10, only_sentences=True)


Search for terms ['Immune', 'Immunity', 'Immune response']


Search terms after cleaning, bigrams, trigrams and synonym expansion: ['immune', 'immunity', 'immune_response', 'immunity_against', 'immune_responses', 'immune_responses', 'adaptive_immune_response', 'immune_response_against', 'cellular_immune_response', 'humoral_immune_response', 'innate_adaptive_immune_responses']
Optional search terms after cleaning, bigrams, trigrams and synonym expansion: ['newcoronavirus', 'coronavirus_covid19', '2019ncov_covid19', 'outbreak_2019_novel', 'sarscov2_2019ncov', 'coronavirus_2019ncov', 'ongoing_outbreak_novel_coronavirus', 'since_late_december', 'ongoing_outbreak_covid19', 'originating_wuhan_china', 'novel_coronavirus_outbreak', 'wuhan_coronavirus']

Results:

Graphical Abstract Highlights d Ten experimentally defined regions within SARS-CoV have high homology with SARS-CoV-2 d Parallel bioinformatics predicted potential B and T cell epitopes for SARS-CoV-2 d Independent approaches identif

In [77]:
search(["Effectiveness", "movement control", "restrictions", "strategy", "prevent secondary transmission"],
       optional_keywords=["new_coronavirus", "coronavirus", "covid19"],
       top_n=10, only_sentences=True)


Search for terms ['Effectiveness', 'movement control', 'restrictions', 'strategy', 'prevent secondary transmission']


Search terms after cleaning, bigrams, trigrams and synonym expansion: ['effectiveness', 'movement', 'control', 'restrictions', 'strategy_prevent', 'secondary_transmission', 'efficacy', 'movements', 'strategy_preventing', 'secondary_cases', 'among_close_contacts', 'unrecognized_cases', 'superspreading_events_occurred']
Optional search terms after cleaning, bigrams, trigrams and synonym expansion: ['newcoronavirus', 'coronavirus_covid19', '2019ncov_covid19', 'outbreak_2019_novel', 'sarscov2_2019ncov', 'coronavirus_2019ncov', 'ongoing_outbreak_novel_coronavirus', 'since_late_december', 'ongoing_outbreak_covid19', 'originating_wuhan_china', 'novel_coronavirus_outbreak', 'wuhan_coronavirus']

Results:

To address it, in this paper, we propose a flexible framework incorporating the effectiveness of the government control to forecast the whole process of a new unknown infect

In [78]:
search(["Effectiveness", "personal protective equipment", "PPE", "strategy", "prevent transmission"],
       optional_keywords=["new_coronavirus", "coronavirus", "covid19"],
       top_n=10, only_sentences=True)


Search for terms ['Effectiveness', 'personal protective equipment', 'PPE', 'strategy', 'prevent transmission']


Search terms after cleaning, bigrams, trigrams and synonym expansion: ['effectiveness', 'personal_protective_equipment_ppe', 'strategy_prevent', 'transmission', 'efficacy', 'personal_protective_equipment', 'appropriate_ppe', 'equipment_ppe', 'n95_higher', 'proper_personal_protective', 'proper_use_ppe', 'including_n95_masks', 'use_personal_protection', 'wearing_full', 'protective_gear', 'strategy_preventing', 'trans_mission', 'transmissions', 'disease_transmission', 'contact_transmission']
Optional search terms after cleaning, bigrams, trigrams and synonym expansion: ['newcoronavirus', 'coronavirus_covid19', '2019ncov_covid19', 'outbreak_2019_novel', 'sarscov2_2019ncov', 'coronavirus_2019ncov', 'ongoing_outbreak_novel_coronavirus', 'since_late_december', 'ongoing_outbreak_covid19', 'originating_wuhan_china', 'novel_coronavirus_outbreak', 'wuhan_coronavirus']

Results:

5, 6 

In [79]:
search(["seasonality", "transmission", "humidity", "heat", "summer"],
       optional_keywords=["new_coronavirus", "coronavirus", "covid19"],
       top_n=10, only_sentences=True)


Search for terms ['seasonality', 'transmission', 'humidity', 'heat', 'summer']


Search terms after cleaning, bigrams, trigrams and synonym expansion: ['seasonality', 'transmission', 'humidity_heat', 'summer', 'seasonal_patterns', 'seasonal_variation', 'seasonal_pattern', 'seasonal_trends', 'seasonality_influenza', 'seasonal_variations', 'trans_mission', 'transmissions', 'disease_transmission', 'contact_transmission', 'overall_discomfort', 'microclimate_temperature_humidity', 'emotional_benefits', 'perceived_comfort', 'n95mask_combination', 'perceived_exertion', 'thermophysiological', 'nanofunctional', 'physical_discomfort', 'subjective_ratings', 'winter', 'autumn', 'during_winter', 'during_summer', 'rainy', 'winter_summer', 'summer_autumn', 'fall_winter', 'cold_winter', 'during_winter_spring']
Optional search terms after cleaning, bigrams, trigrams and synonym expansion: ['newcoronavirus', 'coronavirus_covid19', '2019ncov_covid19', 'outbreak_2019_novel', 'sarscov2_2019ncov', 'coronav