In [None]:
from appyter import magic
magic.init(lambda _=globals: _())

In [None]:
import os
import sys
import zipfile
import datetime

import pandas as pd
import numpy as np

# Display / graphing
from IPython.display import display, HTML
import plotly.express as px

# API access
import requests
import io
import time

In [None]:
# Notebook display util functions
def make_clickable(link):
    return f'<a target="_blank" href="{link}">{link}</a>'

table_number = 0
figure_number = 0
def figure_header(label,title):
    global table_number
    global figure_number
    if label == 'Table':
        table_number += 1
        label = f'Table {table_number}'
    elif label == 'Figure':
        figure_number += 1
        label = f'Figure {figure_number}'
    display(HTML(f"<div style='font-size:2rem; padding:1rem 0;'><b>{label}</b>: {title}</div>"))
    
def figure_legend(label,title,content=''):
    global table_number
    global figure_number
    if label == 'Table':
        label = f'Table {table_number}'
    elif label == 'Figure':
        label = f'Figure {figure_number}'
    display(HTML(f'<style>div.caption {{text-align: center;}}</style><div class=caption><b>{label}</b>: <i>{title}</i>. {content} </div>'))

In [None]:
%%appyter hide_code

{% do SectionField(name='section1',
                   title = '1. Submit Your Biomedical Term of Interest:',
                   img = 'load_icon.png')%}

{% do SectionField(name='section2',
                   title = '2. Chooose Number of Top Associated Drugs to Make Predictions:',
                   img = 'load_icon.png')%}

In [None]:
%%appyter code_exec

term = {{ StringField(name='input_term',
                     label='Biomedical Term',
                     default='Lung cancer',
                     description='Input your biomedical term of interest.',
                     section = 'section1') }}

set_size = {{ IntField(name = 'set_size',
                        label = 'Associated drug set size',
                        min = 50,
                        max = 200,
                        default = 50,
                        description = 'Amount of drugs used for co-expression prediction',
                        section = 'section2') }}

### Load DrugRIF

In [None]:
DrugRIF = pd.read_csv('https://appyters.maayanlab.cloud/storage/DrugShot/DrugRIF.tsv.gz',sep = '\t', usecols = ['name','PMID']).set_index('name')

### Query Term Using PubMed Eutils API

In [None]:
i = 0
pubmed_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term={}&retmax=100000&retstart={}&retmode=json"

results = []
res = requests.get(pubmed_url.format(term, i)).json()
while i <= int(res['esearchresult']['count']):
    results.extend(requests.get(pubmed_url.format(term,i)).json()['esearchresult']['idlist'])
    i += len(res['esearchresult']['idlist'])
    time.sleep(0.2)

### Associated Drug Table

In [None]:
# Retrieve top 500 associated drugs from DrugRIF based on search term co-mentions in the literature
df_associated = pd.DataFrame(DrugRIF[DrugRIF['PMID'].isin(results) == True].index.value_counts())\
.rename(columns = {'name':'Publications with Search Term'})[0:500]

# Get counts of each chemical in DrugRIF
chemical_counts = []
for chemical in df_associated.index:
    chemical_counts.append(DrugRIF.index.tolist().count(chemical))
    
df_associated['Publications with Search Term / Total Publications'] = df_associated.div(chemical_counts, axis=0)
df_associated['Score'] = df_associated['Publications with Search Term'] * df_associated['Publications with Search Term / Total Publications']

In [None]:
associated_table = df_associated.sort_values(by = ['Score'], ascending = False)
associated_table.to_csv(term.replace(' ','_')+'_associated_drug_table.csv')

figure_header('Table', 'Top Associated Compounds ({})'.format(make_clickable(term.replace(' ','_')+'_associated_drug_table.csv')))
display(associated_table.head(20))

### Scatter Plot of Drug Frequency in Literature

In [None]:
fig = px.scatter(df_associated.reset_index().rename(columns = {'index':'chemical'}), hover_data= ['chemical'],
                 x = 'Publications with Search Term', y= 'Publications with Search Term / Total Publications',
                 title=term)
fig.show()

### Predicted Drug Table

In [None]:
# Load correlation matrix into pandas DataFrame
response = requests.get('https://appyters.maayanlab.cloud/storage/DrugShot/L1000_similarity_matrix.npz')
correlation_matrix = np.load(io.BytesIO(response.content), allow_pickle = True)

df_correlation = pd.DataFrame(data = correlation_matrix['correlations'], columns = correlation_matrix['index'],
                      index = correlation_matrix['index'])
df_correlation[df_correlation.columns] = df_correlation[df_correlation.columns].replace({1:np.nan})

In [None]:
# Calculate average correlation for each drug with the associated drug set
df_correlation = df_correlation.loc[df_correlation.index.isin(associated_table.index[0:set_size])]
df_correlation.loc['Score'] = df_correlation[df_correlation.columns].mean()
df_correlation.sort_values(by = ['Score'], axis = 1, ascending = False, inplace = True)

In [None]:
predicted_table = pd.DataFrame(df_correlation.loc['Score'][0:200])
predicted_table.to_csv(term.replace(' ','_')+'_predicted_drug_table.csv')
figure_header('Table', 'Top Predicted Compounds ({})'.format(make_clickable(term.replace(' ','_')+'_predicted_drug_table.csv')))
display(predicted_table.head(20))