In [19]:
#%%appyter init
from appyter import magic
magic.init(lambda _=globals: _())

In [20]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import textwrap 
import pickle

In [None]:
%%appyter markdown

# KINOMEScan Data Visualization

This appyter creates bar charts for the visualization of KINOMEScan data. If a small molecule is inputted, it will return 
a list of kinases that small molecule has been found to bind to from KINOMESan assays. 
It will also generate an interactive bar chart displaying the kinases sorted by their equilibrium dissociation constant (an indication of binding affinity.)
Similarly, if a kinase is inputted, it will return a list of the small molecules that have been found to bind to it. 
It will also generate an interactive bar chart displaying the small molecules sorted by their equilibrium dissociation constant. 

In [22]:
%%appyter hide_code

{% do SectionField(name='title', title = 'Visualize KINOMEScan Data')%}

{% do SectionField(name = 'section', title = 'Input a small molecule and/or kinase', 
                   subtitle = 'Input a small molecule to visualize the kinases that it binds to, ' + 
                   'and/or input a kinase to visualize the small molecules that bind it.')%}   

In [None]:
%%appyter markdown

### Generate list of kinases for small molecule input

In [None]:
%%appyter code_exec

# if small molecule name inputted, will go through its CSV file and sort by target affinity

sm_name = {{ StringField(
        name = 'Small molecule name',
        label = 'Small molecule name',
        description = 'The small molecule name',
        default = 'Afatinib',
        section = 'section'
    )}}

kinome_df = pd.read_excel('http://lincs.hms.harvard.edu/wordpress/wp-content/uploads/2013/11/HMS-LINCS_KinomeScan_Datasets_2018-01-18.xlsx')
kinome_df['sm_hms_id'] = kinome_df['sm_hms_id'].str.replace(r'\D', '') #remove HMSL before the ID
hms_id = kinome_df.loc[kinome_df['sm_name'] == sm_name, 'sm_hms_id']

# remove the row number
hms_id_list = []
for hms_id in hms_id:
    hms_id_list.append(hms_id)
    
hms_id_string = str(hms_id_list[0])
old_url = 'http://lincs.hms.harvard.edu/db/datasets/20000/results?small+molecules=HMS_ID&output_type=.csv'
new_url = old_url.replace("HMS_ID", hms_id_string) # fills in HMS_ID with the correct small molecule 
data = pd.read_csv(new_url)
df = pd.DataFrame(data)

target_affinity_1 = df.loc[df['Binding Class'] == 1, 'HUGO Gene Symbol']
target_affinity_2 = df.loc[df['Binding Class'] == 2, 'HUGO Gene Symbol']
target_affinity_3 = df.loc[df['Binding Class'] == 3, 'HUGO Gene Symbol']
target_affinity_10 = df.loc[df['Binding Class'] == 10, 'HUGO Gene Symbol'] 

#js = Javascript('alert("Invalid input")')
kinase1_list = []
kinase2_list = []
kinase3_list = []

for kinase in target_affinity_1:
    if kinase not in kinase1_list: # removes duplicates
        kinase1_list.append(str(kinase))

if len(kinase1_list) != 0:
    print(sm_name + ' binds to the following kinases with Kd < 100 nM: ')
    kinase1_string = ', '.join(kinase1_list)
    print(kinase1_string)
    print()

for kinase in target_affinity_2:
    if kinase not in kinase2_list: # removes duplicates
        kinase2_list.append(str(kinase))

if len(kinase2_list) != 0:
    print(sm_name + ' binds to the following kinases with 100 nM ≤ Kd < 1µM: ')
    kinase2_string = ', '.join(kinase2_list)
    print(kinase2_string)
    print()

for kinase in target_affinity_3:
    if kinase not in kinase3_list: # removes duplicates
        kinase3_list.append(str(kinase))

if len(kinase3_list) != 0:
    print(sm_name + " binds to the following kinases with 1µM ≤ Kd < 10 µM: ")
    kinase3_string = ', '.join(kinase3_list)
    print(kinase3_string)

In [None]:
%%appyter markdown

### Generate bar chart for small molecule input
Hover over bar(s) to see kinases

In [None]:
x = ['Kd < 100 nM', '100 nM ≤ Kd < 1µM', '1µM ≤ Kd < 10 µM']
y = [len(kinase1_list), len(kinase2_list), len(kinase3_list)]

# Text wraparound
kinase1_lab = '<br>'.join(textwrap.wrap(kinase1_string, width=50))
kinase2_lab ='<br>'.join(textwrap.wrap(kinase2_string, width=50))
kinase3_lab ='<br>'.join(textwrap.wrap(kinase3_string, width=50))

fig = go.Figure(data=[go.Bar(x = x, y = y, text = y, textposition = 'auto',
                             hovertext = [kinase1_lab, kinase2_lab, kinase3_lab], 
                             hoverlabel = dict(font = dict(size = 18)),
                             marker = {'color': y,
                                        'colorscale': 'Plotly3'})])

fig.update_layout(
    title = {
        'text': 'Kinases bound by ' + sm_name,
        'y':0.87,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top',
    },
    xaxis_title = "Equilibrium Dissociation Constant",
    yaxis_title = "Kinases",
    font = dict(
        #family = "Courier New, monospace",
        size = 18,
        color = 'black'
    )
)

fig.show()

In [101]:
# Sort kinases by their classification

uniprot_df = pd.read_excel('uniprot_data.xlsx')
kinases_data = pd.read_csv('http://lincs.hms.harvard.edu/db/proteins/?search=&output_type=.csv')
kinases_df = pd.DataFrame(kinases_data)

# for every kinase in each kinase list ('Name')
# find its corresponding 'UniProt ID' in kinases_df
# look up that UniProt ID in uniprot_df ('Entry')
# find the entry in 'Protein families' and do an if statement for each kinase classification (if '' in: )

agc_kinases = []
camk_kinases = []
ck1_kinases = []
cmgc_kinases = []
rgc_kinases = []
ste_kinases = []
tk_kinases = []
tkl_kinases = []

In [None]:
%%appyter code_exec

# if kinase inputted, return small molecules  
kinase_name = {{ StringField(
        name = 'Kinase name',
        label = 'Kinase name',
        description = 'The kinase name',
        default = 'ABL1',
        section = 'section'
    )}}

# retrieve the dictionaries and unpickle
with open('dict_1.pickle', 'rb') as handle:
    dict_1 = pickle.load(handle)
with open('dict_2.pickle', 'rb') as handle:
    dict_2 = pickle.load(handle)
with open('dict_3.pickle', 'rb') as handle:
    dict_3 = pickle.load(handle)
    
# lists for each target affinity for the small molecules
sm1_list = []
sm2_list = []
sm3_list = []

if kinase_name in dict_1:
    sm1_list = dict_1[kinase_name]
if kinase_name in dict_2:
    sm2_list = dict_2[kinase_name]
if kinase_name in dict_3:
    sm3_list = dict_3[kinase_name]
    
if len(sm1_list) != 0:
    print('Small molecules that bind to ' + kinase_name + ' with Kd < 100 nM: ')
    sm1_string = ', '.join(sm1_list)
    print(sm1_string)
    print()
    
if len(sm2_list) != 0:
    print('Small molecules that bind to ' + kinase_name + ' with 100 nM ≤ Kd < 1µM: ')
    sm2_string = ', '.join(sm2_list)
    print(sm2_string)    
    print()
    
if len(target_aff3) != 0:
    print('Small molecules that bind to ' + kinase_name + ' with 1µM ≤ Kd < 10 µM: ')
    sm3_string = ', '.join(sm3_list)
    print(sm3_string)    
    print()

In [None]:
# Bar graph for the small molecules 

x = ['Kd < 100 nM', '100 nM ≤ Kd < 1µM', '1µM ≤ Kd < 10 µM']
y = [len(sm1_list), len(sm2_list), len(sm3_list)]

# Text wraparound
sm1_lab = '<br>'.join(textwrap.wrap(sm1_string, width=50))
sm2_lab ='<br>'.join(textwrap.wrap(sm2_string, width=50))
sm3_lab ='<br>'.join(textwrap.wrap(sm3_string, width=50))

fig = go.Figure(data=[go.Bar(x = x, y = y, text = y, textposition = 'auto',
                             hovertext = [sm1_lab, sm2_lab, sm3_lab], 
                             hoverlabel = dict(font = dict(size = 18)),
                             marker = {'color': y,
                                        'colorscale': 'Plotly3'})])

fig.update_layout(
    title = {
        'text': 'Small molecules that bind ' + kinase_name,
        'y':0.87,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top',
    },
    xaxis_title = "Equilibrium Dissociation Constant",
    yaxis_title = "Number of small molecules",
    font = dict(
        #family = "Courier New, monospace",
        size = 18,
        color = 'black'
    )
)

fig.show()