In [None]:
#%%appyter init
from appyter import magic
magic.init(lambda _=globals: _())

# LINCS Chemical Similarity Appyter
The Library of Integrated Network-Based Cellular Signatures (LINCS) is an NIH Common 
Fund program that catalogs how human cells globally respond to chemical, genetic, and
disease perturbations. Resources generated by LINCS include experimental and 
computational methods, visualization tools, molecular and imaging data, and 
signatures. 

This Appyter provides information about small molecules profiled by the LINCS program. Specifically, 
users can retrieve similiar small molecules based on Tanimoto 
structural similarity and similarity based on the L1000 gene expression.

In [None]:
import pandas as pd
import numpy as np

# Display / graphing
from IPython.display import display, HTML

# API access
import requests
import json

In [None]:
# Notebook display util functions
def make_clickable(link):
    return f'<a target="_blank" href="{link}">{link}</a>'

table_number = 0
figure_number = 0
def figure_header(label,title):
    global table_number
    global figure_number
    if label == 'Table':
        table_number += 1
        label = f'Table {table_number}'
    elif label == 'Figure':
        figure_number += 1
        label = f'Figure {figure_number}'
    display(HTML(f"<div style='font-size:1.25rem; padding:1rem 0;'><b>{label}</b>: {title}</div>"))
    
def figure_legend(label,title,content=''):
    global table_number
    global figure_number
    if label == 'Table':
        label = f'Table {table_number}'
    elif label == 'Figure':
        label = f'Figure {figure_number}'
    display(HTML(f'<style>div.caption {{text-align: center;}}</style><div class=caption><b>{label}</b>: <i>{title}</i>. {content} </div>'))

In [None]:
L1000_similarity_scores = requests.get('https://appyters.maayanlab.cloud/storage/DODGE-Chemical-Similarity/L1000_signature_similarity_scores.json').json()
ECFP4_similartiy_scores = requests.get('https://appyters.maayanlab.cloud/storage/DODGE-Chemical-Similarity/ECFP4_similarity_scores.json').json()
ECFP6_similarity_scores = requests.get('https://appyters.maayanlab.cloud/storage/DODGE-Chemical-Similarity/ECFP6_similarity_scores.json').json()

In [None]:
%%appyter hide_code

{% do SectionField(name='method_selection',
                   title='Input a small molecule from the LINCS collection',
                   subtitle='Begin typing the common name of a small molecule into the autocomplete field below\
                   to find similar small molecules based on Tanimoto structural similarity and L1000 gene expression\
                   signature similarity.',
                   img='drug.png'
)%}

{% set drug = AutocompleteField(name = 'drug',
                                label = 'Small molecule name',
                                default = 'valproic-acid',
                                description = 'Enter the small molecule name of interest',
                                file_path = 'https://appyters.maayanlab.cloud/storage/DODGE-Chemical-Similarity/lincs_drugs.json',
                                section = 'method_selection'
)%}

In [None]:
%%appyter markdown
### Top 20 LINCS small molecules most similar to {{drug}} based on ECFP4 Tanimoto distance
The canonical SMILES strings of LINCS small molecules were converted into Extended Connectivity
Fingerprints (radius=4) using RDKit. Tanimoto similarity between all unique small molecules was computed.
The top 20 most similar small molecules, ranked by Tanimoto similarity, are displayed in the
table below along with a downloadable version with the top 100 most similar small molecules.

In [None]:
%%appyter code_exec
ecfp4 = pd.DataFrame.from_dict(ECFP4_similartiy_scores[{{drug}}],
                       orient='index',
                       columns = ['Tanimoto Similarity Score'])
filename = {{drug}} + "_ECFP4_Tanimoto_Similarity.csv"
ecfp4.to_csv(filename)
figure_header('Table', 'Top Predicted Compounds From ECFP4 Tanimoto Similarity<br>({})</br>'.format(make_clickable(filename)))
display(ecfp4.head(20))

In [None]:
%%appyter markdown
### Top 20 LINCS small molecules most similar to {{drug}} based on ECFP6 Tanimoto distance
The canonical SMILES strings of LINCS small molecules were converted into Extended Connectivity
Fingerprints (radius=6) using RDKit. Tanimoto similarity between all unique small molecules was computed.
The top 20 most similar small molecules, ranked by Tanimoto similarity, are displayed in the
table below along with a downloadable version with the top 100 most similar small molecules.

In [None]:
%%appyter code_exec
ecfp6 = pd.DataFrame.from_dict(ECFP6_similarity_scores[{{drug}}],
                       orient='index',
                       columns = ['Tanimoto Similarity Score'])
filename = {{drug}} + "_ECFP6_Tanimoto_Similarity.csv"
ecfp6.to_csv(filename)
figure_header('Table', 'Top Predicted Compounds From ECFP6 Tanimoto Similarity<br>({})</br>'.format(make_clickable(filename)))
display(ecfp6.head(20))

In [None]:
%%appyter markdown
### Top 20 LINCS small molecules most similar to {{drug}} based on L1000 gene expression signatures
Consensus signatures were computed for each unique LINCS small molecule and pairwise cosine similarity of the
gene expression vectors between all unique small molecules was computed. The top 20 most similar small molecules,
ranked by L1000 gene expression signature cosine similarity, are displayed in the table below along with a
downloadable version with the top 100 most similar small molecules.

In [None]:
%%appyter code_exec
l1000_ge = pd.DataFrame.from_dict(L1000_similarity_scores[{{drug}}],
                       orient='index',
                       columns = ['Cosine similarity of L1000 signatures'])
filename = {{drug}} + "_L1000_Signature_Similarity.csv"
l1000_ge.to_csv(filename)
figure_header('Table', 'Top Predicted Compounds From L1000 Gene Expression Signature Similarity<br>({})</br>'.format(make_clickable(filename)))
display(l1000_ge.head(20))