In [1]:
import pandas as pd
import numpy as np
from nltk import PorterStemmer
import functools
from IPython.core.display import display, HTML

In [2]:
data_path = "/home/dewanshu/Downloads/2020-08-01/metadata.csv"
df=pd.read_csv(data_path, usecols=['title','journal','abstract','authors','doi','publish_time','sha'])
df=df.fillna('N/A')
df = df.drop_duplicates(subset='title', keep="first")
df=df[df['publish_time'].str.contains('2020')]
df["abstract"] = df["abstract"].str.lower()+df["title"].str.lower()

  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
def get_dataFrame(df):
    dfa = df[df['abstract'].str.contains('covid')]
    dfb = df[df['abstract'].str.contains('-cov-2')]
    dfc = df[df['abstract'].str.contains('cov2')]
    dfd = df[df['abstract'].str.contains('ncov')]
    frames=[dfa,dfb,dfc,dfd]
    df = pd.concat(frames)
    df=df.drop_duplicates(subset='title', keep="first")
    return df

In [4]:
df=get_dataFrame(df)
df.head()

Unnamed: 0,sha,title,doi,abstract,publish_time,authors,journal
4662,,Latest assessment on COVID-19 from the Europea...,10.2807/1560-7917.es.2020.25.8.2002271,n/alatest assessment on covid-19 from the euro...,2020-02-27,,Euro Surveill
4698,,Updated rapid risk assessment from ECDC on the...,10.2807/1560-7917.es.2020.25.9.2003051,n/aupdated rapid risk assessment from ecdc on ...,2020-03-05,,Euro Surveill
4732,,Updated rapid risk assessment from ECDC on the...,10.2807/1560-7917.es.2020.25.10.2003121,n/aupdated rapid risk assessment from ecdc on ...,2020-03-12,,Euro Surveill
4800,601e6ac1ad98e359dc021e8896a1a604331ca774,Empfehlungen zur intensivmedizinischen Therapi...,10.1007/s00063-020-00674-3,n/aempfehlungen zur intensivmedizinischen ther...,2020-03-12,"Kluge, Stefan; Janssens, Uwe; Welte, Tobias; W...",Med Klin Intensivmed Notfmed
5683,,The impact of COVID-19 on the provision of don...,10.1038/s41409-020-0873-x,n/athe impact of covid-19 on the provision of ...,2020-03-23,"Szer, Jeff; Weisdorf, Daniel; Querol, Sergio; ...",Bone Marrow Transplant


In [5]:
def stemming_word(search_word):
    stemm = PorterStemmer()
    word_list=[]
    for word in search_word:
        word_list.append(stemm.stem(word))
    return word_list

In [6]:
def search_dataFrame(df,search_word):
    search_word=stemming_word(search_word)
    df1=df[functools.reduce(lambda a, b: a&b, (df['abstract'].str.contains(search) for search in search_word))]
    return df1

In [7]:
def find_relevance(rel_df,search_word):
    rel_df['score']=""
    search_words=stemming_word(search_word)
    for index, row in rel_df.iterrows():
        abstract = row['abstract']
        result = abstract.split()
        len_abstract=len(result)
        count=0
        for word in search_words:
            count = count + result.count(word)
        final_count = (count / len_abstract)
        rel_count = count * final_count
        rel_df.loc[index, 'score'] = rel_count
    rel_df=rel_df.sort_values(by=['score'], ascending=False)
    return rel_df

In [8]:
# function to get best sentences from the search results
def get_sentences(df1,search_words):
    df_table = pd.DataFrame(columns = ["pub_date","authors","title","excerpt","rel_score"])
    search_words = stemming_word(search_words)
    for index, row in df1.iterrows():
        pub_sentence=''
        sentences_used=0
        #break apart the absracrt to sentence level
        sentences = row['abstract'].split('. ')
        #loop through the sentences of the abstract
        highligts=[]
        for sentence in sentences:
            # missing lets the system know if all the words are in the sentence
            missing=0
            #loop through the words of sentence
            for word in search_words:
                #if keyword missing change missing variable
                if word not in sentence:
                    missing=1
            # after all sentences processed show the sentences not missing keywords
            if missing==0 and len(sentence)<1000 and sentence!='':
                sentence=sentence.capitalize()
                if sentence[len(sentence)-1]!='.':
                    sentence=sentence+'.'
                pub_sentence=pub_sentence+'<br><br>'+sentence
        if pub_sentence!='':
            sentence=pub_sentence
            sentences_used=sentences_used+1
            authors=row["authors"].split(" ")
            link=row['doi']
            title=row["title"]
            score=row["score"]
            linka='https://doi.org/'+link
            linkb=title
            sentence='<p fontsize=tiny" align="left">'+sentence+'</p>'
            final_link='<p align="left"><a href="{}">{}</a></p>'.format(linka,linkb)
            to_append = [row['publish_time'],authors[0]+' et al.',final_link,sentence,score]
            df_length = len(df_table)
            df_table.loc[df_length] = to_append
    return df_table

In [9]:
display(HTML('<h1>Task 3: What do we know about vaccines and therapeutics ?</h1>'))

# list of lists of search terms
questions=[
['Q: Effectiveness of drugs being developed and tried to treat COVID-19 patients?'],
['Q: Clinical and bench trials to investigate less common viral inhibitors against COVID-19 such as naproxen, clarithromycin, and minocycline that that may exert effects on viral replication?'],
['Q: Methods evaluating potential complication of Antibody-Dependent Enhancement (ADE) in vaccine recipients?'],
['Q: Exploration of use of best animal models and their predictive value for a human vaccine?'],
['Q: Capabilities to discover a therapeutic (not vaccine) for the disease, and clinical effectiveness studies to discover therapeutics, to include antiviral agents?'],
['Q: Alternative models to aid decision makers in determining how to prioritize and distribute scarce, newly proven therapeutics as production ramps up. This could include identifying approaches for expanding production capacity to ensure equitable and timely distribution to populations in need?'],
['Q: Efforts targeted at developing options for a universal coronavirus vaccine?'],
['Q: Efforts to develop animal models and standardize challenge studies?'],
['Q: Efforts to develop prophylaxis clinical studies and prioritize in healthcare workers?'],
['Q: Approaches to evaluate risk for enhanced disease after vaccination?']
]   

search=[['drugs','treat','patients'],
['COVID-19','minocycline','viral'],
['evaluating','Antibody-Dependent Enhancement','vaccine'],
['animal','predictive','human vaccine'],
['therapeutic','clinical effectiveness','antiviral agents'],
['prioritize','scarce','newly proven therapeutics'],
['targeted','developing','coronavirus vaccine'],
['develop','animal models', 'standardize challenge'],
['develop','prophylaxis clinical', 'healthcare workers'],
['evaluate','enhanced disease','vaccination']
]       
q=0
for search_words in search:
    str1=''
    # make a string of the search words to print readable version from above table
    str1=' '.join(questions[q])
    
    #search the dataframe for all words
    df1=search_dataFrame(df,search_words)

    # analyze search results for relevance 
    df1=find_relevance(df1,search_words)

    # get best sentences
    df_table=get_sentences(df1,search_words)
    
    length=df_table.shape[0]
    #limit 3 results
    df_table=df_table.head(15)
    df_table=df_table.drop(['rel_score'], axis=1)
    #convert df to html
    df_table=HTML(df_table.to_html(escape=False,index=False))
    
    # display search topic
    display(HTML('<h3>'+str1+'</h3>'))
    
    #display table
    if length<1:
        print ("No reliable answer could be located in the literature")
    else:
        display(df_table)
    q=q+1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]


pub_date,authors,title,excerpt
2020-04-01,"Ottesen, et al.",Efficacy of a high-dose antiresorptive drug holiday to reduce the risk of medication-related osteonecrosis of the jaw (MRONJ): A systematic review.,"In 2 studies, patients were being treated with denosumab, but neither showed that a drug holiday was effective."
2020,"Pawar, et al.",Combating devastating COVID-19 by drug repurposing,"Some countries are against the use of these drugs because of adverse effects associated with drug repurposing and lack of statistically significant clinical data, but they have been found to be effective in some countries to treat covid-19 patients (off-label/investigational)."
2020-06-22,"Bishara, et al.",Emerging and experimental treatments for COVID-19 and drug interactions with psychotropic agents,"An even higher threshold of vigilance should be maintained for patients with pre-existing conditions and older adults due to added toxicity and drug interactions, especially with psychotropic agents.emerging and experimental treatments for covid-19 and drug interactions with psychotropic agents."
2020-07-09,"Mummed, et al.",Molecular targets for COVID-19 drug development: Enlightening Nigerians about the pandemic and future treatment,"However, as patient management and drug repositioning are taking place, it is imperative to identify other promising targets used by sars-cov-2 to establish infection, to develop novel therapeutics.molecular targets for covid-19 drug development: enlightening nigerians about the pandemic and future treatment."
2020-04-17,"Pawar, et al.",Combating Devastating COVID -19 by Drug Repurposing,• further investigations of these drugs are recommended to treat covid-19 patients on top priority.combating devastating covid -19 by drug repurposing.
2020-06-12,Au et al.,Anaesthetic Considerations for Rationalizing Drug Use in the Operating Theatre: Strategies in a Singapore Hospital During COVID-19,"Covid-19 patients in the critical care unit tend to have prolonged hospital stay requiring high doses of sedation and paralysis to treat acute respiratory distress syndrome, resulting in a shortage of these drugs."
2020-05-12,"Jafari, et al.",Considerations for interactions of drugs used for the treatment of COVID-19 with anti-Cancer treatments,"Because of the long-term use of chemotherapy drugs, drug interactions are important in these patients especially with sars-cov2 treatments now."
2020,"Jafari, et al.",Considerations for interactions of drugs used for the treatment of COVID-19 with anti-cancer treatments,"Because of the long-term use of chemotherapy drugs, drug interactions are important in these patients especially with sars-cov2 treatments now."
2020-07-17,"Zhu, et al.",Identification of SARS-CoV-2 3CL Protease Inhibitors by a Quantitative High-throughput Screening,"Conclusion and implications some of the newly identified inhibitors of sars-cov-2 3clpro may be used in combination therapy with other drugs for synergistic effect to treat covid-19 patients. Clinical significance some of the newly identified 3clpro inhibitors can be evaluated in drug combination therapy for synergistic effect to treat covid-19 patients, while the others can serve as starting points for medicinal chemistry optimization to improve potency and drug like properties for drug development.identification of sars-cov-2 3cl protease inhibitors by a quantitative high-throughput screening."
2020,"Mohanty, et al.",Application of Artificial Intelligence in COVID-19 drug repurposing,"This technology has the potential to improve the drug discovery, planning, treatment, and reported outcomes of the covid-19 patient, being an evidence-based medical tool. With prior usage experiences in patients, few of the old drugs, if shown active against sars-cov-2, can be readily applied to treat the covid-19 patients."


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]


No reliable answer could be located in the literature


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]


No reliable answer could be located in the literature


No reliable answer could be located in the literature


No reliable answer could be located in the literature


No reliable answer could be located in the literature


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  del sys.path[0]


pub_date,authors,title,excerpt
2020-03-21,"Ong, et al.",COVID-19 coronavirus vaccine design using reverse vaccinology and machine learning,Our predicted vaccine targets provide new strategies for effective and safe covid-19 vaccine development.covid-19 coronavirus vaccine design using reverse vaccinology and machine learning.
2020-06-01,"Arora, et al.",COVID-19 vaccine development and the way forward.,"While the availability of newer technologies has facilitated development, there are several challenges on the way including limited understanding of the pathophysiology, targeting humoral or mucosal immunity, lack of suitable animal model, poor success of human severe acute respiratory syndrome/middle east respiratory syndrome vaccines, limited efficacy of influenza vaccines, and immune exaggeration with animal coronavirus vaccines."
2020,"Arora, et al.",COVID-19 vaccine development and the way forward,"While the availability of newer technologies has facilitated development, there are several challenges on the way including limited understanding of the pathophysiology, targeting humoral or mucosal immunity, lack of suitable animal model, poor success of human severe acute respiratory syndrome/middle east respiratory syndrome vaccines, limited efficacy of influenza vaccines, and immune exaggeration with animal coronavirus vaccines."
2020-05-30,"He, et al.",Highly pathogenic coronaviruses: thrusting vaccine development in the spotlight,"In this review, we will briefly describe coronavirus vaccine design targets, summarize recent advances in the development of coronavirus vaccines, and highlight current adjuvants for improving the efficacy of coronavirus vaccines.highly pathogenic coronaviruses: thrusting vaccine development in the spotlight."
2020-05-03,"Lundstrom, et al.",Coronavirus Pandemic—Therapy and Vaccines,"Previous experience from sars- and mers-coronavirus vaccine and drug development projects have targeted glycoprotein epitopes, monoclonal antibodies, angiotensin receptor blockers and gene silencing technologies, which may be useful for covid-19 too."


No reliable answer could be located in the literature


No reliable answer could be located in the literature


No reliable answer could be located in the literature
