In [1]:
#%%appyter init
from appyter import magic
magic.init(lambda _=globals: _())

In [None]:
%%appyter markdown

# KINOMEscan and TAS Vector Data Visualization

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import textwrap 
import os.path
import urllib.request
import xlrd
from collections import defaultdict
from IPython.core.display import display, HTML

In [None]:
%%appyter hide_code

{% do SectionField(name ='title', title = 'Visualize KINOMEscan Data', 
                   subtitle = 'KINOMEscan assay platform data show small molecule binding and inhibition of protein kinases.', 
                   img = 'poll.png')%}

{% do SectionField(name = 'section0', title = 'Input a Small Molecule and/or Kinase for KINOMEscan Data', 
                   subtitle = 'Input a small molecule to visualize the kinases it binds to ' + 
                   '-AND/OR- input a kinase to visualize the small molecules that bind it.', img = 'database.png')%} 


{% do SectionField(name = 'section2', title = 'Upload or Enter a Kinase and/or Drug List', 
                   subtitle = 'Upload or enter a kinase list to visualize the drugs that best target those protein kinases ' +
                   '-AND/OR- upload or enter a drug list to visualize the kinases the drugs will best target', 
                   img = 'file-upload.png')%}  

{% do SectionField(name = 'section3', title = 'Upload or Enter a Protein/Gene List', 
                   subtitle = 'Upload or enter a gene list to perform kinase enrichment analysis and receive ' +
                   'a ranked list of protein kinases that phosphorylate them. Visualize the drugs that will best target ' +
                   'these associated protein kinases.',
                   img = 'file-upload.png')%}

{% do SectionField(name ='title2', title = 'Visualize Target Affinity Spectrum (TAS) Data',
                   subtitle = 'Target Affinity Spectrum (TAS) vectors summarize binding information ' +
                   'from multiple assay formats.', img = 'poll.png')%}

{% do SectionField(name = 'section', title = 'Input a Small Molecule and/or Kinase for TAS Data', 
                   subtitle = 'Input a small molecule to visualize the molecules it binds to ' + 
                   '-AND/OR- input a kinase to visualize the small molecules that bind it.', img = 'database.png')%} 

In [None]:
%%HTML
<!-- Formatting for the tables -->

<style type="text/css">
table.dataframe td, table.dataframe th {
    border: 1px  black solid !important;
  color: black !important;
}
</style>

In [None]:
# Function that removes the row number (for when df.loc is used)
def remove_row_number(Series):
    for item in Series:
        return str(item)

In [None]:
%%appyter markdown

### Generate table of kinases for small molecule input from KINOMEscan data, with either equilibrium dissociation constant Kd or % Control 
For both Kd and % Control, the lower the number, the higher the binding affinity and inhibition

In [72]:
%%appyter code_exec

# if small molecule name inputted, will go through its CSV file and sort by % Control

sm_name_kinome = {{ StringField(
        name = 'Small molecule name for KINOMEscan',
        label = 'Small molecule name',
        description = 'One small molecule input. Examples include: (s)-CR8, AC220, Afatinib, Seliciclib',
        default = '',
        section = 'section0'
    )}}

kinome_scan_df = pd.read_excel('http://lincs.hms.harvard.edu/wordpress/wp-content/uploads/2013/11/HMS-LINCS_KinomeScan_Datasets_2018-01-18.xlsx')

# Create list of all sm to be used later
all_sm = []
for sm in range(0, 182):
    all_sm.append(kinome_scan_df.iloc[sm]['sm_name'])

if (sm_name_kinome != ''):
    kinome_scan_df_copy = pd.read_excel('http://lincs.hms.harvard.edu/wordpress/wp-content/uploads/2013/11/HMS-LINCS_KinomeScan_Datasets_2018-01-18.xlsx')

    sm_name_kinome_lower = sm_name_kinome.lower() # Disregard capitalization of input
    
    # check for valid input
    kinome_scan_df_copy['sm_name'] = kinome_scan_df_copy['sm_name'].str.lower()
    kinome_scan_sm_names = kinome_scan_df_copy['sm_name']
    kinome_scan_sm_list = []
    for name in kinome_scan_sm_names:
        kinome_scan_sm_list.append(name)
    sm_kinome_valid = sm_name_kinome_lower in kinome_scan_sm_list
    
        
    if (sm_kinome_valid):
        dataset_id = kinome_scan_df.loc[kinome_scan_df['sm_name'].str.lower() == sm_name_kinome_lower, 'dataset_id']
        dataset_id = remove_row_number(dataset_id)

        # return small molecule with proper capitalization
        sm_kinome_proper_cap = kinome_scan_df.loc[kinome_scan_df['dataset_id'] == int(dataset_id), 'sm_name']
        sm_kinome_proper_cap = remove_row_number(sm_kinome_proper_cap)

        # retrieve correct small molecule csv
        sm_url = 'http://lincs.hms.harvard.edu/db/datasets/{}/results?search=&output_type=.xlsx'.format(dataset_id)
        sm_kinome_data = pd.read_excel(sm_url)
        sm_df = pd.DataFrame(sm_kinome_data)

        percentages = defaultdict(set)
        percentages_exists = False
        kds = defaultdict(set)

        if '% Control' in sm_df:
            percentages_exists = True
            sm_df = sm_df.sort_values(by = '% Control')
            percentages = dict(zip(sm_df['Protein Name'], sm_df['% Control']))
            percentages_without_100 = {}
            # Print in table format
            for k, v in percentages.items():
                if (percentages[k] != 100.0):
                    percentages_without_100.update({k: v})

            percentages_pd = pd.DataFrame(percentages_without_100, index = ['% Control'])
            display(HTML('<h2>'+ sm_kinome_proper_cap + ' binds to the following kinases</h2>'))
            display(HTML('<i>The small molecules with % control values of 100 are omitted. Scroll right if necessary to see all kinases.<i>'))
            display(HTML(percentages_pd.to_html()))

        else:
            sm_df = sm_df[sm_df['Kd'].notna()] # remove all rows with no value
            sm_df = sm_df.sort_values(by = ['Kd'])
            kds = dict(zip(sm_df['Protein Name'], sm_df['Kd']))
            kds_pd = pd.DataFrame(kds, index = ['Kd'])

            display(HTML('<h2>'+ sm_kinome_proper_cap + ' binds to the following kinases</h2>'))
            display(HTML('<i>Scroll right if necessary to see all kinases.<i>'))
            display(HTML(kds_pd.to_html()))

    else:
        display(HTML('<h2> The small molecule input was not recognized. </h2>'))        

else:
    display(HTML('<h2> There was no small molecule input. </h2>'))
    

```python
# if small molecule name inputted, will go through its CSV file and sort by % Control
sm_name_kinome = ''
kinome_scan_df = pd.read_excel('http://lincs.hms.harvard.edu/wordpress/wp-content/uploads/2013/11/HMS-LINCS_KinomeScan_Datasets_2018-01-18.xlsx')
# Create list of all sm to be used later
all_sm = []
for sm in range(0, 182):
    all_sm.append(kinome_scan_df.iloc[sm]['sm_name'])
if (sm_name_kinome != ''):
    kinome_scan_df_copy = pd.read_excel('http://lincs.hms.harvard.edu/wordpress/wp-content/uploads/2013/11/HMS-LINCS_KinomeScan_Datasets_2018-01-18.xlsx')
    sm_name_kinome_lower = sm_name_kinome.lower() # Disregard capitalization of input
    # check for valid input
    kinome_scan_df_copy['sm_name'] = kinome_scan_df_copy['sm_name'].str.lower()
    kinome_scan_sm_names = kinome_scan_df_copy['sm_name']
    kinome_scan_sm_list = []
    for name in kinome_scan_sm_names:
        kinome_scan_sm_list.append(name)
    sm_kinome_valid = sm_name_kinome_lower in kinome_scan_sm_list
    if (sm_kinome_valid):
        dataset_id = kinome_scan_df.loc[kinome_scan_df['sm_name'].str.lower() == sm_name_kinome_lower, 'dataset_id']
        dataset_id = remove_row_number(dataset_id)
        # return small molecule with proper capitalization
        sm_kinome_proper_cap = kinome_scan_df.loc[kinome_scan_df['dataset_id'] == int(dataset_id), 'sm_name']
        sm_kinome_proper_cap = remove_row_number(sm_kinome_proper_cap)
        # retrieve correct small molecule csv
        sm_url = 'http://lincs.hms.harvard.edu/db/datasets/{}/results?search=&output_type=.xlsx'.format(dataset_id)
        sm_kinome_data = pd.read_excel(sm_url)
        sm_df = pd.DataFrame(sm_kinome_data)
        percentages = defaultdict(set)
        percentages_exists = False
        kds = defaultdict(set)
        if '% Control' in sm_df:
            percentages_exists = True
            sm_df = sm_df.sort_values(by = '% Control')
            percentages = dict(zip(sm_df['Protein Name'], sm_df['% Control']))
            percentages_without_100 = {}
            # Print in table format
            for k, v in percentages.items():
                if (percentages[k] != 100.0):
                    percentages_without_100.update({k: v})
            percentages_pd = pd.DataFrame(percentages_without_100, index = ['% Control'])
            display(HTML('<h2>'+ sm_kinome_proper_cap + ' binds to the following kinases</h2>'))
            display(HTML('<i>The small molecules with % control values of 100 are omitted. Scroll right if necessary to see all kinases.<i>'))
            display(HTML(percentages_pd.to_html()))
        else:
            sm_df = sm_df[sm_df['Kd'].notna()] # remove all rows with no value
            sm_df = sm_df.sort_values(by = ['Kd'])
            kds = dict(zip(sm_df['Protein Name'], sm_df['Kd']))
            kds_pd = pd.DataFrame(kds, index = ['Kd'])
            display(HTML('<h2>'+ sm_kinome_proper_cap + ' binds to the following kinases</h2>'))
            display(HTML('<i>Scroll right if necessary to see all kinases.<i>'))
            display(HTML(kds_pd.to_html()))
    else:
        display(HTML('<h2> The small molecule input was not recognized. </h2>'))
else:
    display(HTML('<h2> There was no small molecule input. </h2>'))
```

In [None]:
# Function that bar charts will use to make the data labels readable 
def prep_and_wrap(aList):
    add_commas = ', '.join(aList)
    return '<br>'.join(textwrap.wrap(add_commas, width = 50))    

In [None]:
# Function to generate all bar charts
def generateBarChart(xdata, ydata, w, hover, titletext, xtitle, ytitle):           
    fig = go.Figure(data=[go.Bar(x = xdata, y = ydata, text = ydata, width = w, textposition = 'auto',
                                     hovertext = hover, 
                                     hoverlabel = dict(font = dict(size = 18)),
                                     marker = {'color': y,
                                                'colorscale': ['#66CCEE', '#228833', '#CCBB44', '#EE6677', '#AA3377']})])

    fig.update_layout(
                    title = {
                        'text': titletext,
                        'y':0.87,
                        'x':0.5,
                        'xanchor': 'center',
                        'yanchor': 'top',
                    },
                    xaxis_title = xtitle,
                    yaxis_title = ytitle,
                    font = dict(
                        #family = "Courier New, monospace",
                        size = 18,
                        color = 'black'
                        )
                    )

    return fig 

In [None]:
%%appyter markdown

### Generate bar chart for small molecule input from KINOMEscan data
Hover over bar(s) to see kinases.

In [None]:
# if there was a small molecule input, generate and display the bar chart
if (sm_name_kinome != ''): 
    if (sm_kinome_valid):
        if percentages_exists:
            keys_0 = []
            keys_20 = []
            keys_40 = []
            keys_60 = []
            keys_80 = []
            keys_95 = []

            for key in percentages.keys():
                if percentages[key] < 20.0:
                    keys_0.append(key)
                elif percentages[key] < 40.0:
                    keys_20.append(key)
                elif percentages[key] < 60.0:
                    keys_40.append(key)
                elif percentages[key] < 80.0:
                    keys_60.append(key)
                elif percentages[key] < 95.0:
                    keys_80.append(key)
 
            # Prepare and wrap text for labels 
            kinase0_label = prep_and_wrap(keys_0)
            kinase20_label = prep_and_wrap(keys_20)
            kinase40_label = prep_and_wrap(keys_40)
            kinase60_label = prep_and_wrap(keys_60)
            kinase80_label = prep_and_wrap(keys_80)

            x = ['< 20%', '20% ≤ ... < 40%', '40% ≤ ... < 60%', '60% ≤ ... < 80%', '80% ≤ ... < 95%', '<95%']
            y = [len(keys_0), len(keys_20), len(keys_40), len(keys_60), len(keys_80)]
            hovertext = [kinase0_label, kinase20_label, kinase40_label, kinase60_label, kinase80_label]
            title = 'Kinases bound by ' + sm_kinome_proper_cap
            xaxis_title = '% Control'
            yaxis_title = 'Kinases'

            fig1 = generateBarChart(x, y, 0.3, hovertext, title, xaxis_title, yaxis_title)
            fig1.show()

        else:
            kds_1 = []
            kds_2 = []
            kds_3 = []

            for key in kds.keys():
                if kds[key] < 100.0:
                    kds_1.append(key)
                elif kds[key] < 1000.0:
                    kds_2.append(key)
                elif kds[key] < 10000.0:
                    kds_3.append(key)

            # Prepare and wrap text for labels 
            kds1_label = prep_and_wrap(kds_1)
            kds2_label = prep_and_wrap(kds_2)
            kds3_label = prep_and_wrap(kds_3)

            x = ['Kd < 100 nM', '100 nM ≤ Kd < 1µM', '1µM ≤ Kd < 10 µM']
            y = [len(kds_1), len(kds_2), len(kds_3)]
            hovertext = [kds1_label, kds2_label, kds3_label]
            title = 'Kinases bound by ' + sm_kinome_proper_cap
            xaxis_title = 'Kd'
            yaxis_title = 'Kinases'
            
            fig2 = generateBarChart(x, y, 0.2, hovertext, title, xaxis_title, yaxis_title)
            fig2.show()
            
    else:
        display(HTML('<h2> The small molecule input was not recognized. </h2>'))
        
else:
    display(HTML('<h2> There was no small molecule input. </h2>'))

In [None]:
%%appyter markdown

### Generate list of small molecules for kinase input from KINOMEscan data
Includes download of the KINOMEscan GMT files. For more information on how this was assembled, go to 
the KinomeScan-Appyter folder in the HarmonizomePythonScripts repository
(https://github.com/MaayanLab/HarmonizomePythonScripts).

In [44]:
%%appyter code_exec

# if kinase inputted, return small molecules  
kinase_name_kinome = {{ StringField(
        name = 'Kinase name for KINOMEscan',
        label = 'Kinase name',
        description = 'One kinase input. Examples include: ABL2, ALK, CSF1R, EPHA3',
        default = 'ABL2',
        section = 'section0'
    )}}

if not os.path.exists('percentage_levels.gmt'):
    urllib.request.urlretrieve('https://gist.githubusercontent.com/serena-zhang/fa52e2a629dd8ca6b3b0270674f2e5e7/raw/c4a1d1ba7ae84e6a0502a0338152d7e8ffbf9018/percentage_levels.gmt', 'percentage_levels.gmt')
percentage_levels = {split_line[0]: split_line[2:] for split_line in map(lambda s: s.split('\t'), open('percentage_levels.gmt', 'r'))}
if not os.path.exists('kds_levels.gmt'):
    urllib.request.urlretrieve('https://gist.githubusercontent.com/serena-zhang/fa52e2a629dd8ca6b3b0270674f2e5e7/raw/c4a1d1ba7ae84e6a0502a0338152d7e8ffbf9018/kds_levels.gmt', 'kds_levels.gmt')
kds_levels = {split_line[0]: split_line[2:] for split_line in map(lambda s: s.split('\t'), open('kds_levels.gmt', 'r'))}

# Create nested percentage_levels_dict {perc_nested_dict[kinase]: {sm: % Control, sm2: % Control,...}}
perc_nested_dict = {}
for kinase in percentage_levels.keys():
    perc_nested_dict[kinase] = {}
    for sm in percentage_levels[kinase]:
        if not sm.isspace(): # Remove tab character
            perc_nested_dict[kinase][(sm.split(',')[0]).strip()] = sm.split(',')[1]
print(perc_nested_dict['ABL2'])
# Create nested kds_dict 
kds_nested_dict = {}
for kinase in kds_levels.keys():
    kds_nested_dict[kinase] = {}
    for sm in kds_levels[kinase]:
        if not sm.isspace(): # Remove tab character
            kds_nested_dict[kinase][(sm.split(',')[0]).strip()] = sm.split(',')[1]

# Create dictionary for capitalization
kinase_names_cap = {}
for kinase_name in percentage_levels.keys():
    kinase_names_cap.update({kinase_name.lower() : kinase_name})
for kinase_name in kds_levels.keys():
    kinase_names_cap.update({kinase_name.lower() : kinase_name})
    
if (kinase_name_kinome != ''):    
    kinase_name_kinome = kinase_name_kinome.lower()
    
    # check for valid input
    kinase_kinome_valid = kinase_name_kinome in kinase_names_cap
    
    if (kinase_kinome_valid):
        kinase_proper_cap = kinase_names_cap[kinase_name_kinome] # Retrieve proper capitalization of kinase
        
        percentage_dict_exists = False
        kds_dict_exists = False

        if kinase_proper_cap in kds_levels:
            kds_list = kds_levels[kinase_proper_cap]

        if (len(percentage_list) != 0):
            percentage_dict_exists = True
            percentage_dict = perc_nested_dict[kinase_proper_cap]
            percentage_dict = {k: v for k, v in sorted(percentage_dict.items(), key=lambda item: float(item[1]))} # Sort
            table1df = pd.DataFrame(percentage_dict, index = ['% Control'])

            # Display HTML table
            display(HTML('<h2>The small molecules that ' + kinase_proper_cap + ' binds to, with corresponding % Control values</h2>'))
            display(HTML('<i>Scroll right if necessary to see all small molecules. Molecules with % Control of 100 were omitted.<i>'))
            display(HTML(table1df.to_html()))

        if (len(kds_list) != 0):
            kds_dict_exists = True
            kds_dict = kds_nested_dict[kinase_proper_cap]
            kds_dict = {k: v for k, v in sorted(kds_dict.items(), key=lambda item: float(item[1]))} # Sort
            table2df = pd.DataFrame([kds_dict], index = ['Kd'])

            # Display HTML table
            display(HTML('<h2>The small molecules that ' + kinase_proper_cap + ' binds to, with corresponding Kd values</h2>'))
            display(HTML('<i>Scroll right if necessary to see all molecules.<i>'))
            display(HTML(table2df.to_html()))

    else:
        display(HTML('<h2> The kinase input was not recognized. </h2>'))
        
else:
    display(HTML('<h2> There was no kinase input. </h2>'))

```python
# if kinase inputted, return small molecules
kinase_name_kinome = 'ABL2'
if not os.path.exists('percentage_levels.gmt'):
    urllib.request.urlretrieve('https://gist.githubusercontent.com/serena-zhang/fa52e2a629dd8ca6b3b0270674f2e5e7/raw/c4a1d1ba7ae84e6a0502a0338152d7e8ffbf9018/percentage_levels.gmt', 'percentage_levels.gmt')
percentage_levels = {split_line[0]: split_line[2:] for split_line in map(lambda s: s.split('\t'), open('percentage_levels.gmt', 'r'))}
if not os.path.exists('kds_levels.gmt'):
    urllib.request.urlretrieve('https://gist.githubusercontent.com/serena-zhang/fa52e2a629dd8ca6b3b0270674f2e5e7/raw/c4a1d1ba7ae84e6a0502a0338152d7e8ffbf9018/kds_levels.gmt', 'kds_levels.gmt')
kds_levels = {split_line[0]: split_line[2:] for split_line in map(lambda s: s.split('\t'), open('kds_levels.gmt', 'r'))}
# Create nested percentage_levels_dict {perc_nested_dict[kinase]: {sm: % Control, sm2: % Control,...}}
perc_nested_dict = {}
for kinase in percentage_levels.keys():
    perc_nested_dict[kinase] = {}
    for sm in percentage_levels[kinase]:
        if not sm.isspace(): # Remove tab character
            perc_nested_dict[kinase][(sm.split(',')[0]).strip()] = sm.split(',')[1]
print(perc_nested_dict['ABL2'])
# Create nested kds_dict
kds_nested_dict = {}
for kinase in kds_levels.keys():
    kds_nested_dict[kinase] = {}
    for sm in kds_levels[kinase]:
        if not sm.isspace(): # Remove tab character
            kds_nested_dict[kinase][(sm.split(',')[0]).strip()] = sm.split(',')[1]
# Create dictionary for capitalization
kinase_names_cap = {}
for kinase_name in percentage_levels.keys():
    kinase_names_cap.update({kinase_name.lower() : kinase_name})
for kinase_name in kds_levels.keys():
    kinase_names_cap.update({kinase_name.lower() : kinase_name})
if (kinase_name_kinome != ''):
    kinase_name_kinome = kinase_name_kinome.lower()
    # check for valid input
    kinase_kinome_valid = kinase_name_kinome in kinase_names_cap
    if (kinase_kinome_valid):
        kinase_proper_cap = kinase_names_cap[kinase_name_kinome] # Retrieve proper capitalization of kinase
        percentage_dict_exists = False
        kds_dict_exists = False
        if kinase_proper_cap in kds_levels:
            kds_list = kds_levels[kinase_proper_cap]
        if (len(percentage_list) != 0):
            percentage_dict_exists = True
            percentage_dict = perc_nested_dict[kinase_proper_cap]
            percentage_dict = {k: v for k, v in sorted(percentage_dict.items(), key=lambda item: float(item[1]))} # Sort
            table1df = pd.DataFrame(percentage_dict, index = ['% Control'])
            # Display HTML table
            display(HTML('<h2>The small molecules that ' + kinase_proper_cap + ' binds to, with corresponding % Control values</h2>'))
            display(HTML('<i>Scroll right if necessary to see all small molecules. Molecules with % Control of 100 were omitted.<i>'))
            display(HTML(table1df.to_html()))
        if (len(kds_list) != 0):
            kds_dict_exists = True
            kds_dict = kds_nested_dict[kinase_proper_cap]
            kds_dict = {k: v for k, v in sorted(kds_dict.items(), key=lambda item: float(item[1]))} # Sort
            table2df = pd.DataFrame([kds_dict], index = ['Kd'])
            # Display HTML table
            display(HTML('<h2>The small molecules that ' + kinase_proper_cap + ' binds to, with corresponding Kd values</h2>'))
            display(HTML('<i>Scroll right if necessary to see all molecules.<i>'))
            display(HTML(table2df.to_html()))
    else:
        display(HTML('<h2> The kinase input was not recognized. </h2>'))
else:
    display(HTML('<h2> There was no kinase input. </h2>'))
```

{'(s)-CR8': '91.0', '5z-7-oxozeaenol': '87.0', 'Abemaciclib': '91.0', 'AG1478': '34.0', 'Alpelisib': '96.0', 'ALW-II-38-3': '1.0', 'ALW-II-49-7': '21.0', 'AZ-628': '12.0', 'AZD 5438': '60.0', 'AZD-6482': '95.0', 'AZD4547': '13.0', 'AZD7762': '0.2', 'Barasertib': '79.0', 'Baricitinib': '65.0', 'BGJ398': '62.0', 'BMS-536924': '2.5', 'BS-181': '90.0', 'Buparlisib': '99.0', 'BX-912': '7.8', 'Ceritinib': '95.0', 'CGP74514A': '21.0', 'CP466722': '33.0', 'Crizotinib': '0.9', 'Dabrafenib': '9.3', 'Dinaciclib': '95.0', 'GSK1059615': '77.0', 'GSK429286A': '86.0', 'GSK461364': '90.0', 'GW-5074': '66.0', 'GW843682X': '79.0', 'HG-14-10-04': '4.0', 'HG-5-113-01': '9.4', 'HG-6-64-01': '0.0', 'HG-9-91-01': '0.2', 'Ibrutinib': '38.0', 'JNK-9L': '67.0', 'JNK-IN-11': '21.0', 'JNK-IN-5A': '86.0', 'JNK-IN-8': '97.0', 'JW-7-24-1': '34.0', 'JWE-035': '3.0', 'KIN001-111': '0.8', 'KIN001-220': '20.0', 'KU63794': '95.0', 'MLN8054': '30.0', 'Momelotinib': '57.0', 'MRT67307': '83.0', 'Neratinib': '92.0', 'NG25': 

Unnamed: 0,HG-6-64-01,WH-4-023,AZD7762,HG-9-91-01,OTSSP167,NG25,WH-4-025,KIN001-111,THZ1,Crizotinib,ALW-II-38-3,NVP-TAE684,XMD16-144,BMS-536924,JWE-035,HG-14-10-04,R406,Ribociclib,BX-912,WZ-4-145,Dabrafenib,HG-5-113-01,AZ-628,AZD4547,KIN001-220,ALW-II-49-7,CGP74514A,JNK-IN-11,Torkinib,MLN8054,QL-XI-92,Vemurafenib,CP466722,XMD-12,AG1478,JW-7-24-1,Sorafenib,PLX-4720,Ibrutinib,TAK-715,SB590885,Momelotinib,AZD 5438,BGJ398,Baricitinib,GW-5074,JNK-9L,GSK1059615,Barasertib,GW843682X,Omipalisib,XMD14-99,ZM-447439,MRT67307,WYE-125132,GSK429286A,JNK-IN-5A,QL-X-138,5z-7-oxozeaenol,BS-181,GSK461364,PD0325901,(s)-CR8,Abemaciclib,Neratinib,NU7441,RO-3306,SCH772984,Taselisib,WZ3105,AZD-6482,Ceritinib,Dinaciclib,KU63794,Alpelisib,JNK-IN-8,Palbociclib,XMD11-50,Buparlisib,Pictilisib,Torin1
% Control,0.0,0.1,0.2,0.2,0.2,0.3,0.4,0.8,0.8,0.9,1.0,1.4,1.4,2.5,3.0,4.0,4.4,5.8,7.8,9.1,9.3,9.4,12.0,13.0,20.0,21.0,21.0,21.0,28.0,30.0,32.0,32.0,33.0,33.0,34.0,34.0,35.0,36.0,38.0,47.0,55.0,57.0,60.0,62.0,65.0,66.0,67.0,77.0,79.0,79.0,82.0,82.0,82.0,83.0,84.0,86.0,86.0,86.0,87.0,90.0,90.0,90.0,91.0,91.0,92.0,92.0,92.0,93.0,94.0,94.0,95.0,95.0,95.0,95.0,96.0,97.0,97.0,97.0,99.0,99.0,99.0


Unnamed: 0,Dasatinib,Bosutinib,Tozasertib,Imatinib,Nilotinib,Foretinib,Vandetanib,Masitinib,Erlotinib,KW2449,NVP-TAE684,Crizotinib,Fedratinib,R406,Canertinib,Ki20227,Sunitinib,Doramapimod,Gefitinib,Brivanib,RAF 265,VX-745,Nintedanib,PLX-4720,Sorafenib,Pazopanib,MLN8054,Motesanib,Lestaurtinib,PHA-665752
Kd,0.17,1.5,4.0,10.0,26.0,27.0,69.0,110.0,200.0,250.0,380.0,460.0,570.0,720.0,870.0,950.0,1000.0,1300.0,1600.0,1800.0,1900.0,1900.0,2000.0,2700.0,2900.0,3000.0,3300.0,4500.0,6500.0,6900.0


In [None]:
%%appyter markdown

### Generate bar chart for kinase input from KINOMEscan data
Hover over bar(s) to see small molecules.

In [None]:
# if there was a small molecule input, generate and display the bar chart
if (kinase_name_kinome != ''): 
    if (kinase_kinome_valid):
        if percentage_dict_exists:
            # List of small molecules in each category 
            sm_0 = []
            sm_20 = []
            sm_40 = []
            sm_60 = []
            sm_80 = []

            for key in percentage_dict.keys():
                if float(percentage_dict[key]) < 20.0:
                    sm_0.append(key)
                elif float(percentage_dict[key]) < 40.0:
                    sm_20.append(key)
                elif float(percentage_dict[key]) < 60.0:
                    sm_40.append(key)
                elif float(percentage_dict[key]) < 80.0:
                    sm_60.append(key)
                else:
                    sm_80.append(key)

            # Prepare and wrap text for labels 
            sm0_label = prep_and_wrap(sm_0)
            sm20_label = prep_and_wrap(sm_20)
            sm40_label = prep_and_wrap(sm_40)
            sm60_label = prep_and_wrap(sm_60)
            sm80_label = prep_and_wrap(sm_80)

            x = ['< 20%', '20% ≤ ... < 40%', '40% ≤ ... < 60%', '60% ≤ ... < 80%', '80% ≤ ... < 100%']
            y = [len(sm_0), len(sm_20), len(sm_40), len(sm_60), len(sm_80)]
            hovertext = [sm0_label, sm20_label, sm40_label, sm60_label, sm80_label]
            title = 'Small molecules bound by ' + kinase_proper_cap + ' (% Control)'
            xaxis_title = '% Control'
            yaxis_title = 'Small molecules'

            fig1 = generateBarChart(x, y, 0.3, hovertext, title, xaxis_title, yaxis_title)
            fig1.show()

        if kds_dict_exists:
            sm_kds_1 = []
            sm_kds_2 = []
            sm_kds_3 = []

            for key in kds_dict.keys():
                if float(kds_dict[key]) < 100.0:
                    sm_kds_1.append(key)
                elif float(kds_dict[key]) < 1000.0:
                    sm_kds_2.append(key)
                elif float(kds_dict[key]) < 10000.0:
                    sm_kds_3.append(key)

            # Prepare and wrap text for labels 
            sm_kds1_label = prep_and_wrap(sm_kds_1)
            sm_kds2_label = prep_and_wrap(sm_kds_2)
            sm_kds3_label = prep_and_wrap(sm_kds_3)

            x = ['Kd < 100 nM', '100 nM ≤ Kd < 1µM', '1µM ≤ Kd < 10 µM']
            y = [len(sm_kds_1), len(sm_kds_2), len(sm_kds_3)]
            hovertext = [sm_kds1_label, sm_kds2_label, sm_kds3_label]
            title = 'Small molecules bound by ' + kinase_proper_cap + ' (Kd)'
            xaxis_title = 'Kd'
            yaxis_title = 'Small molecules'

            fig2 = generateBarChart(x, y, 0.2, hovertext, title, xaxis_title, yaxis_title)
            fig2.show()
            
    else:
        display(HTML('<h2> The kinase input was not recognized. </h2>'))        
    
else:
    display(HTML('<h2> There was no small molecule input. </h2>'))

In [5]:
%%appyter code_exec

# Import kinase list as file or text box 
# Will choose file upload over textbox if a file is given 

drug_list_file = {{ FileField(
        name = 'Upload drug list',
        label = 'Upload drug list',
        description = 'Drug list upload',
        default = '',
        section = 'section3'
    )}}

drug_list_input = {{ TextField(
        name = 'Input drug list',
        label = 'Input drug list (one per row)',
        description = 'Drug list input (e.g. (s)-CR8)',
        default = '(s)-CR8 \n Seliciclib',
        section = 'section3',
    )}}

drug_input_exists = False 

if drug_list_file != '':
    open_drug_list_file = open(drug_list_file,'r')
    lines = open_drug_list_file.readlines()
    drugs = [x.strip() for x in lines]
    open_drug_list_file.close()
    drug_input_exists = True 
elif kinase_list_input != '':
    kinases = kinase_list_input.split('\n')
    kinases = [x.strip() for x in kinases]
    drug_input_exists = True 
else:
    display(HTML('<h2>No drug list was inputted or uploaded.</h2>'))

if drug_input_exists:
    drug_input = list(set(drugs)) # remove duplicates

    # all the other drugs that were not inputted
    other_drugs = []
    #HMS_KINOMESCAN

    # go through prot_kinases and retrieve the small molecules that bound to at least one of them
    sm_percentage_list = []
    sm_kds_list = []
    for kinase in prot_kinases:
        sm_percentage_list.append(percentage_list[kinase])
        sm_kds_list.append(kds_list[kinase])
    
    sm_percentage_list = [item for items in sm_percentage_list for item in items] # remove nested lists 
    sm_kds_list = [item for items in sm_kds_list for item in items] # remove nested lists 
    sm_percentage_list = [sm.strip('\n') for sm in sm_percentage_list]
    sm_kds_list = [sm.strip('\n') for sm in sm_kds_list]
    sm_percentage_list = list(set(sm_percentage_list)) # remove duplicates 
    sm_kds_list = list(set(sm_kds_list)) # remove duplicates 

    # for small molecules with percentages: return top 5 molecules ranked by 
    # having the lowest percentages for these but highest for other kinases 
    
    # for small molecules with kds: return top 5 molecules ranked by 
    # having the lowest kds for these but highest for other kinases 

```python
# Import kinase list as file or text box
# Will choose file upload over textbox if a file is given
drug_list_file = ''
drug_list_input = '''(s)-CR8
 Seliciclib'''
drug_input_exists = False
if drug_list_file != '':
    open_drug_list_file = open(drug_list_file,'r')
    lines = open_drug_list_file.readlines()
    drugs = [x.strip() for x in lines]
    open_drug_list_file.close()
    drug_input_exists = True
elif kinase_list_input != '':
    kinases = kinase_list_input.split('\n')
    kinases = [x.strip() for x in kinases]
    drug_input_exists = True
else:
    display(HTML('<h2>No drug list was inputted or uploaded.</h2>'))
if drug_input_exists:
    drug_input = list(set(drugs)) # remove duplicates
    # all the other drugs that were not inputted
    other_drugs = []
    #HMS_KINOMESCAN
    # go through prot_kinases and retrieve the small molecules that bound to at least one of them
    sm_percentage_list = []
    sm_kds_list = []
    for kinase in prot_kinases:
        sm_percentage_list.append(percentage_list[kinase])
        sm_kds_list.append(kds_list[kinase])
    sm_percentage_list = [item for items in sm_percentage_list for item in items] # remove nested lists
    sm_kds_list = [item for items in sm_kds_list for item in items] # remove nested lists
    sm_percentage_list = [sm.strip('\n') for sm in sm_percentage_list]
    sm_kds_list = [sm.strip('\n') for sm in sm_kds_list]
    sm_percentage_list = list(set(sm_percentage_list)) # remove duplicates
    sm_kds_list = list(set(sm_kds_list)) # remove duplicates
    # for small molecules with percentages: return top 5 molecules ranked by
    # having the lowest percentages for these but highest for other kinases
    # for small molecules with kds: return top 5 molecules ranked by
    # having the lowest kds for these but highest for other kinases
```

In [103]:
%%appyter code_exec

# Import kinase list as file or text box 
# Will choose file upload over textbox if a file is given 

kinase_list_file = {{ FileField(
        name = 'Upload kinase list',
        label = 'Upload kinase list',
        description = 'Kinase list upload',
        default = '',
        section = 'section2'
    )}}

kinase_list_input = {{ TextField(
        name = 'Input kinase list',
        label = 'Input kinase list (one per row)',
        description = 'Kinase list input (e.g. AAK1 ABL1 ABL2 ACVR1)',
        default = 'BRAF',
        section = 'section2',
    )}}

input_exists = False 

if kinase_list_file != '':
    open_kinase_list_file = open(kinase_list_file,'r')
    lines = open_kinase_list_file.readlines()
    kinases = [x.strip() for x in lines]
    open_kinase_list_file.close()
    input_exists = True 
elif kinase_list_input != '':
    kinases = kinase_list_input.split('\n')
    kinases = [x.strip() for x in kinases]
    input_exists = True 
else:
    display(HTML('<h2>No kinase list was inputted or uploaded.</h2>'))

all_kinases = []
for kinase in percentage_levels.keys():
    all_kinases.append(kinase)
        
if input_exists:
    # remove any non-protein kinases and repeats from the inputted list
    prot_kinase_input = set()
    for kinase in kinases:
        prot_kinase_input.add(kinase)

    # all the other kinases that were not inputted
    other_kinases = set() 
    for kinase in all_kinases:
        if kinase not in prot_kinase_input:
            other_kinases.add(kinase)
            
    # go through prot_kinases and retrieve the small molecules that bound to at least one of them
    sm_percentage_list = []
    sm_kds_list = []
    all_relevant_sm = []
    for kinase in prot_kinase_input:
        sm_percentage_list.append(perc_nested_dict[kinase].keys())
        all_relevant_sm.append(perc_nested_dict[kinase].keys())
        sm_kds_list.append(kds_nested_dict[kinase].keys())
        all_relevant_sm.append(kds_nested_dict[kinase].keys())
    
    sm_percentage_list = list(set([item for items in sm_percentage_list for item in items])) # remove nested lists and duplicates
    sm_kds_list = list(set([item for items in sm_kds_list for item in items])) # remove nested lists 
    all_relevant_sm = list(set([item for items in all_relevant_sm for item in items]))
    
    # Dict that will return a ranked list of sm NOT considering specificity
    # Format: {sm: avg_percent_for_kinases, sm2: ...}
    ranked_perc_dict = {} 
    ranked_kd_dict = {}
    
    # Dict that will return a ranked list of sm CONSIDERING specificity
    # Format: {sm: (avg_percent_for_kinases - avg_percent_for_other_kinases), sm2: ...}
    ranked_perc_dict_spec = {} 
    ranked_kd_dict_spec = {}
    
    # FOR ALL SM THAT BOND TO AT LEAST ONE OF INPUTTED KINASES
    for sm in sm_percentage_list:                        
        kinase_perc_total = 0 
        other_kinases_perc_total = 0

        # For all inputted kinases, find average % Control for each sm
        for kinase in prot_kinase_input:
            if sm in perc_nested_dict[kinase].keys(): # if sm has data point for kinase
                kinase_perc_total += float(perc_nested_dict[kinase][sm])
            else:
                kinase_perc_total += 100.0
                    
        avg_perc_for_kinases = kinase_perc_total / len(prot_kinase_input)
        ranked_perc_dict.update({sm: avg_perc_for_kinases})

            # For all other kinases, find average % Control for each sm
        for kinase in other_kinases:
            if sm_category == sm_percentage_list:
                if sm in perc_nested_dict[kinase].keys(): # if sm has data point for kinase
                    other_kinases_perc_total += float(perc_nested_dict[kinase][sm])
                else:
                    other_kinases_perc_total += 100.0
                
        avg_perc_for_other_kinases = other_kinases_perc_total / len(other_kinases)            
        ranked_perc_dict_spec.update({sm: avg_perc_for_kinases - avg_perc_for_other_kinases})

    # Sort the two dictionaries
    #print(ranked_perc_dict)
    ranked_perc_dict = sorted(ranked_perc_dict.items(), key=lambda x: x[1])
    ranked_perc_dict_spec = sorted(ranked_perc_dict_spec.items(), key=lambda x: x[1])
    
    print(ranked_perc_dict)
    print(ranked_perc_dict_spec)
    

```python
# Import kinase list as file or text box
# Will choose file upload over textbox if a file is given
kinase_list_file = ''
kinase_list_input = '''BRAF'''
input_exists = False
if kinase_list_file != '':
    open_kinase_list_file = open(kinase_list_file,'r')
    lines = open_kinase_list_file.readlines()
    kinases = [x.strip() for x in lines]
    open_kinase_list_file.close()
    input_exists = True
elif kinase_list_input != '':
    kinases = kinase_list_input.split('\n')
    kinases = [x.strip() for x in kinases]
    input_exists = True
else:
    display(HTML('<h2>No kinase list was inputted or uploaded.</h2>'))
all_kinases = []
for kinase in percentage_levels.keys():
    all_kinases.append(kinase)
if input_exists:
    # remove any non-protein kinases and repeats from the inputted list
    prot_kinase_input = set()
    for kinase in kinases:
        prot_kinase_input.add(kinase)
    # all the other kinases that were not inputted
    other_kinases = set()
    for kinase in all_kinases:
        if kinase not in prot_kinase_input:
            other_kinases.add(kinase)
    # go through prot_kinases and retrieve the small molecules that bound to at least one of them
    sm_percentage_list = []
    sm_kds_list = []
    all_relevant_sm = []
    for kinase in prot_kinase_input:
        sm_percentage_list.append(perc_nested_dict[kinase].keys())
        all_relevant_sm.append(perc_nested_dict[kinase].keys())
        sm_kds_list.append(kds_nested_dict[kinase].keys())
        all_relevant_sm.append(kds_nested_dict[kinase].keys())
    sm_percentage_list = list(set([item for items in sm_percentage_list for item in items])) # remove nested lists and duplicates
    sm_kds_list = list(set([item for items in sm_kds_list for item in items])) # remove nested lists
    all_relevant_sm = list(set([item for items in all_relevant_sm for item in items]))
    # Dict that will return a ranked list of sm NOT considering specificity
    # Format: {sm: avg_percent_for_kinases, sm2: ...}
    ranked_perc_dict = {}
    ranked_kd_dict = {}
    # Dict that will return a ranked list of sm CONSIDERING specificity
    # Format: {sm: (avg_percent_for_kinases - avg_percent_for_other_kinases), sm2: ...}
    ranked_perc_dict_spec = {}
    ranked_kd_dict_spec = {}
    # FOR ALL SM THAT BOND TO AT LEAST ONE OF INPUTTED KINASES
    for sm in sm_percentage_list:
        kinase_perc_total = 0
        other_kinases_perc_total = 0
        # For all inputted kinases, find average % Control for each sm
        for kinase in prot_kinase_input:
            if sm in perc_nested_dict[kinase].keys(): # if sm has data point for kinase
                kinase_perc_total += float(perc_nested_dict[kinase][sm])
            else:
                kinase_perc_total += 100.0
        avg_perc_for_kinases = kinase_perc_total / len(prot_kinase_input)
        ranked_perc_dict.update({sm: avg_perc_for_kinases})
            # For all other kinases, find average % Control for each sm
        for kinase in other_kinases:
            if sm_category == sm_percentage_list:
                if sm in perc_nested_dict[kinase].keys(): # if sm has data point for kinase
                    other_kinases_perc_total += float(perc_nested_dict[kinase][sm])
                else:
                    other_kinases_perc_total += 100.0
        avg_perc_for_other_kinases = other_kinases_perc_total / len(other_kinases)
        ranked_perc_dict_spec.update({sm: avg_perc_for_kinases - avg_perc_for_other_kinases})
    # Sort the two dictionaries
    #print(ranked_perc_dict)
    ranked_perc_dict = sorted(ranked_perc_dict.items(), key=lambda x: x[1])
    ranked_perc_dict_spec = sorted(ranked_perc_dict_spec.items(), key=lambda x: x[1])
    print(ranked_perc_dict)
    print(ranked_perc_dict_spec)
```

[('SB590885', 0.0), ('PLX-4720', 0.0), ('Vemurafenib', 0.0), ('AZ-628', 0.0), ('Dabrafenib', 0.0), ('HG-6-64-01', 0.6), ('GW-5074', 0.9), ('Sorafenib', 1.2), ('ALW-II-38-3', 1.4), ('ALW-II-49-7', 2.0), ('OTSSP167', 3.6), ('JW-7-24-1', 4.1), ('Torkinib', 5.8), ('KIN001-111', 8.9), ('NG25', 9.1), ('HG-9-91-01', 17.0), ('WH-4-025', 20.0), ('PHA-767491', 22.0), ('R406', 24.0), ('ZSTK474', 34.0), ('QL-XI-92', 35.0), ('CP466722', 36.0), ('TAK-715', 40.0), ('JNK-IN-11', 43.0), ('AZD 5438', 44.0), ('Omipalisib', 46.0), ('AS-252424', 46.8), ('GSK1059615', 48.0), ('Crizotinib', 53.0), ('Ibrutinib', 53.0), ('HG-14-8-02', 56.0), ('Torin1', 59.0), ('AG1478', 64.0), ('QL-X-138', 66.0), ('XMD16-144', 66.0), ('KIN001-220', 70.0), ('THZ1', 72.0), ('Torin2', 73.0), ('PHA-793887', 73.0), ('Momelotinib', 74.0), ('XMD13-2', 74.0), ('XMD15-27', 78.0), ('Baricitinib', 78.0), ('RO-3306', 79.0), ('GSK461364', 79.0), ('ZG-10', 80.0), ('BGJ398', 81.0), ('KU63794', 81.0), ('WZ3105', 81.0), ('XMD11-50', 81.0), ('B

In [None]:
%%appyter markdown

### Generate list of kinases for small molecule input based on TAS vectors

In [None]:
%%appyter code_exec

# if small molecule name inputted, will go through its CSV file and sort by target affinity

sm_name = {{ StringField(
        name = 'Small molecule name',
        label = 'Small molecule name',
        description = 'One small molecule input. Examples include: (s)-CR8, AC220, Afatinib, Seliciclib',
        default = '',
        section = 'section'
    )}}

kinase1_string = ''
kinase2_string = ''
kinase3_string = ''

if (sm_name != ''):
    kinome_df = pd.read_excel('http://lincs.hms.harvard.edu/wordpress/wp-content/uploads/2013/11/HMS-LINCS_KinomeScan_Datasets_2018-01-18.xlsx')
    kinome_df_copy = pd.read_excel('http://lincs.hms.harvard.edu/wordpress/wp-content/uploads/2013/11/HMS-LINCS_KinomeScan_Datasets_2018-01-18.xlsx')
    
    kinome_df['sm_hms_id'] = kinome_df['sm_hms_id'].str.replace(r'\D', '') #remove HMSL before the ID
    sm_name_lower = sm_name.lower()
    
    # check for valid input
    kinome_df_copy['sm_name'] = kinome_df_copy['sm_name'].str.lower()
    kinome_sm_names = kinome_df_copy['sm_name']
    kinome_sm_names_list = []
    for name in kinome_sm_names:
        kinome_sm_names_list.append(name)
    sm_tas_valid = sm_name_lower in kinome_sm_names_list
    
    if (sm_tas_valid):
        hms_id = kinome_df.loc[kinome_df['sm_name'].str.lower() == sm_name_lower, 'sm_hms_id']
        hms_id = remove_row_number(hms_id)
    
        # return small molecule with proper capitalization
        sm_tas_proper_cap = kinome_df.loc[kinome_df['sm_hms_id'] == hms_id, 'sm_name']
        sm_tas_proper_cap = remove_row_number(sm_tas_proper_cap)
    
        url = 'http://lincs.hms.harvard.edu/db/datasets/20000/results?small+molecules={}&output_type=.csv'.format(hms_id)
        data = pd.read_csv(url)
        df = pd.DataFrame(data)

        target_affinity_1 = df.loc[df['Binding Class'] == 1, 'HUGO Gene Symbol']
        target_affinity_2 = df.loc[df['Binding Class'] == 2, 'HUGO Gene Symbol']
        target_affinity_3 = df.loc[df['Binding Class'] == 3, 'HUGO Gene Symbol']
        target_affinity_10 = df.loc[df['Binding Class'] == 10, 'HUGO Gene Symbol'] 

        kinase1_list = []
        kinase2_list = []
        kinase3_list = []

        for kinase in target_affinity_1:
            if kinase not in kinase1_list: # removes duplicates
                kinase1_list.append(str(kinase))

        if len(kinase1_list) != 0:
            display(HTML('<h2>' + sm_tas_proper_cap + ' binds to the following kinases with Kd < 100 nM: </h2>'))
            kinase1_string = ', '.join(kinase1_list)
            display(HTML(kinase1_string))

        for kinase in target_affinity_2:
            if kinase not in kinase2_list: # removes duplicates
                kinase2_list.append(str(kinase))

        if len(kinase2_list) != 0:
            display(HTML('<h2>' + sm_tas_proper_cap + ' binds to the following kinases with 100 nM ≤ Kd < 1µM: </h2>'))
            kinase2_string = ', '.join(kinase2_list)
            display(HTML(kinase2_string))

        for kinase in target_affinity_3:
            if kinase not in kinase3_list: # removes duplicates
                kinase3_list.append(str(kinase))

        if len(kinase3_list) != 0:
            display(HTML('<h2>' + sm_tas_proper_cap + ' binds to the following kinases with 1µM ≤ Kd < 10 µM: </h2>'))
            kinase3_string = ', '.join(kinase3_list)
            display(HTML(kinase3_string))
    
    else:
        display(HTML('<h2> The small molecule input was not recognized. </h2>'))

else:
    display(HTML('<h2> There was no small molecule input. </h2>'))

In [None]:
%%appyter markdown

### Generate bar chart for small molecule input based on TAS vectors
Hover over bar(s) to see kinases.

In [None]:
# if there was a small molecule input, generate and display the bar chart
if (sm_name != ''): 
    
    if (sm_tas_valid):
        x = ['Kd < 100 nM', '100 nM ≤ Kd < 1µM', '1µM ≤ Kd < 10 µM']
        y = [len(kinase1_list), len(kinase2_list), len(kinase3_list)]

        # Text wraparound
        kinase1_lab = '<br>'.join(textwrap.wrap(kinase1_string, width=50))
        kinase2_lab ='<br>'.join(textwrap.wrap(kinase2_string, width=50))
        kinase3_lab ='<br>'.join(textwrap.wrap(kinase3_string, width=50))
        
        hovertext = [kinase1_lab, kinase2_lab, kinase3_lab]
        title = 'Kinases bound by ' + sm_tas_proper_cap
        xaxis_title = 'Equilibrium Dissociation Constant'
        yaxis_title = 'Kinases'

        fig = generateBarChart(x, y, 0.3, hovertext, title, xaxis_title, yaxis_title)
        fig.show()
        
    else:
        display(HTML('<h2> The small molecule input was not recognized. </h2>'))   
        
else:
    display(HTML('<h2> There was no small molecule input. </h2>'))

In [None]:
%%appyter markdown

### Generate list of small molecules for kinase input based on TAS vectors
Includes download of the TAS GMT files. For more information on how this was assembled, go to 
the KinomeScan-Appyter folder in the HarmonizomePythonScripts repository
(https://github.com/MaayanLab/HarmonizomePythonScripts). 

In [None]:
%%appyter code_exec

# if kinase inputted, return small molecules  
kinase_name = {{ StringField(
        name = 'Kinase name',
        label = 'Kinase name',
        description = 'One kinase input. Examples include: ABL1, ABL2, ALK, CCNA1, MAP2K1',
        default = '',
        section = 'section'
    )}}

sm1_string = ''
sm2_string = ''
sm3_string = ''

if not os.path.exists('level1.gmt'):
    urllib.request.urlretrieve('https://gist.githubusercontent.com/serena-zhang/a5344564a9beed30e7b5a626da1c0deb/raw/2ce9fad8256de9c288cedea24c64671c0f69e9e0/level1.gmt', 'level1.gmt')
level_1 = {split_line[0]: split_line[2:] for split_line in map(lambda s: s.split('\t'), open('level1.gmt', 'r'))}
if not os.path.exists('level2.gmt'):
    urllib.request.urlretrieve('https://gist.githubusercontent.com/serena-zhang/a5344564a9beed30e7b5a626da1c0deb/raw/2ce9fad8256de9c288cedea24c64671c0f69e9e0/level2.gmt', 'level2.gmt')
level_2 = {split_line[0]: split_line[2:] for split_line in map(lambda s: s.split('\t'), open('level2.gmt', 'r'))}
if not os.path.exists('level3.gmt'):
    urllib.request.urlretrieve('https://gist.githubusercontent.com/serena-zhang/a5344564a9beed30e7b5a626da1c0deb/raw/2ce9fad8256de9c288cedea24c64671c0f69e9e0/level3.gmt', 'level3.gmt')
level_3 = {split_line[0]: split_line[2:] for split_line in map(lambda s: s.split('\t'), open('level3.gmt', 'r'))}

# create dictionary for capitalization
kinase_tas_cap = {}
for kinase in level_1.keys():
    kinase_tas_cap.update({kinase.lower() : kinase})
for kinase in level_2.keys():
    if (kinase.lower() not in kinase_tas_cap):
        kinase_tas_cap.update({kinase.lower() : kinase})
for kinase in level_3.keys():
    if (kinase.lower() not in kinase_tas_cap):
        kinase_tas_cap.update({kinase.lower() : kinase})

if (kinase_name != ''):
    # check for valid input
    kinase_name = kinase_name.lower()
    kinase_tas_valid = kinase_name in kinase_tas_cap

    if (kinase_tas_valid):
        # retrieve proper kinase capitalization
        kinase_name = kinase_tas_cap[kinase_name]
        
        # lists for each target affinity for the small molecules
        sm1_list = []
        sm2_list = []
        sm3_list = []

        if kinase_name in level_1:
            sm1_list = level_1[kinase_name]
        if kinase_name in level_2:
            sm2_list = level_2[kinase_name]
        if kinase_name in level_3:
            sm3_list = level_3[kinase_name]

        if len(sm1_list) != 0:
            display(HTML('<h2>' + kinase_name + ' binds to the following small molecules with Kd < 100 nM: </h2>'))
            sm1_string = ', '.join(sm1_list)
            display(HTML(sm1_string))

        if len(sm2_list) != 0:
            display(HTML('<h2>' + kinase_name + ' binds to the following small molecules with 100 nM ≤ Kd < 1µM: </h2>'))
            sm2_string = ', '.join(sm2_list)
            display(HTML(sm2_string))

        if len(sm3_list) != 0:
            display(HTML('<h2>' + kinase_name + ' binds to the following small molecules with 1µM ≤ Kd < 10 µM: </h2>'))
            sm3_string = ', '.join(sm3_list)
            display(HTML(sm3_string))
    
    else:
        display(HTML('<h2> The kinase input was not recognized. </h2>'))         

else:
    display(HTML('<h2> There was no kinase input. </h2>'))

In [None]:
%%appyter markdown

### Generate bar chart for kinase input based on TAS vectors
Hover over bar(s) to see small molecules.

In [None]:
# if there was a kinase input, generate and display the bar chart
if (kinase_name != ''):
    
    if (kinase_tas_valid):
        x = ['Kd < 100 nM', '100 nM ≤ Kd < 1µM', '1µM ≤ Kd < 10 µM']
        y = [len(sm1_list), len(sm2_list), len(sm3_list)]

        # Text wraparound
        sm1_lab = '<br>'.join(textwrap.wrap(sm1_string, width = 50))
        sm2_lab ='<br>'.join(textwrap.wrap(sm2_string, width = 50))
        sm3_lab ='<br>'.join(textwrap.wrap(sm3_string, width = 50))

        hovertext = [sm1_lab, sm2_lab, sm3_lab]
        title = 'Small molecules that bind ' + kinase_name                                    
        xaxis_title = 'Equilibrium Dissociation Constant'
        yaxis_title = 'Number of small molecules'

        fig = generateBarChart(x, y, 0.3, hovertext, title, xaxis_title, yaxis_title)
        fig.show()
    
    else:
        display(HTML('<h2> The kinase input was not recognized. </h2>'))         

else:
    display(HTML('<h2> There was no kinase input. </h2>'))