In [None]:
#%%appyter init
from appyter import magic
magic.init(lambda _=globals: _())

In [None]:
%%appyter markdown

# KINOMEscan and TAS Vector Data Visualization

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import textwrap 
import os.path
import urllib.request
import xlrd
from collections import defaultdict
from IPython.core.display import display, HTML

In [None]:
%%appyter hide_code

{% do SectionField(name ='title', title = 'Visualize KINOMEscan Data', 
                   subtitle = 'KINOMEscan assay platform data show small molecule binding and inhibition of protein kinases.', 
                   img = 'poll.png')%}

{% do SectionField(name = 'section0', title = 'Input a Small Molecule and/or Kinase for KINOMEscan Data', 
                   subtitle = 'Input a small molecule to visualize the kinases it binds to ' + 
                   '-AND/OR- input a kinase to visualize the small molecules that bind it.', img = 'database.png')%} 

{% do SectionField(name ='title2', title = 'Visualize Target Affinity Spectrum (TAS) Data',
                   subtitle = 'Target Affinity Spectrum (TAS) vectors summarize binding information ' +
                   'from multiple assay formats.', img = 'poll.png')%}

{% do SectionField(name = 'section', title = 'Input a Small Molecule and/or Kinase for TAS Data', 
                   subtitle = 'Input a small molecule to visualize the molecules it binds to ' + 
                   '-AND/OR- input a kinase to visualize the small molecules that bind it.', img = 'database.png')%} 

In [None]:
%%HTML
<!-- Formatting for the tables -->

<style type="text/css">
table.dataframe td, table.dataframe th {
    border: 1px  black solid !important;
  color: black !important;
}
</style>

In [None]:
# Function that removes the row number (for when df.loc is used)
def remove_row_number(Series):
    for item in Series:
        return str(item)

In [None]:
%%appyter markdown

### Generate table of kinases for small molecule input from KINOMEscan data, with either equilibrium dissociation constant Kd or % Control 
For both Kd and % Control, the lower the number, the higher the binding affinity and inhibition

In [None]:
%%appyter code_exec

# if small molecule name inputted, will go through its CSV file and sort by % Control

sm_name_kinome = {{ StringField(
        name = 'Small molecule name for KINOMEscan',
        label = 'Small molecule name',
        description = 'One small molecule input. Examples include: (s)-CR8, AC220, Afatinib, Seliciclib',
        default = 'afatinib',
        section = 'section0'
    )}}

if (sm_name_kinome != ''):
    kinome_scan_df = pd.read_excel('http://lincs.hms.harvard.edu/wordpress/wp-content/uploads/2013/11/HMS-LINCS_KinomeScan_Datasets_2018-01-18.xlsx')
    kinome_scan_df_copy = pd.read_excel('http://lincs.hms.harvard.edu/wordpress/wp-content/uploads/2013/11/HMS-LINCS_KinomeScan_Datasets_2018-01-18.xlsx')
    
    sm_name_kinome_lower = sm_name_kinome.lower() # Disregard capitalization of input
    
    # check for valid input
    kinome_scan_df_copy['sm_name'] = kinome_scan_df_copy['sm_name'].str.lower()
    kinome_scan_sm_names = kinome_scan_df_copy['sm_name']
    kinome_scan_sm_list = []
    for name in kinome_scan_sm_names:
        kinome_scan_sm_list.append(name)
    sm_kinome_valid = sm_name_kinome_lower in kinome_scan_sm_list
    
        
    if (sm_kinome_valid):
        dataset_id = kinome_scan_df.loc[kinome_scan_df['sm_name'].str.lower() == sm_name_kinome_lower, 'dataset_id']
        dataset_id = remove_row_number(dataset_id)

        # return small molecule with proper capitalization
        sm_kinome_proper_cap = kinome_scan_df.loc[kinome_scan_df['dataset_id'] == int(dataset_id), 'sm_name']
        sm_kinome_proper_cap = remove_row_number(sm_kinome_proper_cap)

        # retrieve correct small molecule csv
        sm_url = 'http://lincs.hms.harvard.edu/db/datasets/{}/results?search=&output_type=.xlsx'.format(dataset_id)
        sm_kinome_data = pd.read_excel(sm_url)
        sm_df = pd.DataFrame(sm_kinome_data)

        percentages = defaultdict(set)
        percentages_exists = False
        kds = defaultdict(set)

        if '% Control' in sm_df:
            percentages_exists = True
            sm_df = sm_df.sort_values(by = '% Control')
            percentages = dict(zip(sm_df['Protein Name'], sm_df['% Control']))
            percentages_without_100 = {}
            # Print in table format
            for k, v in percentages.items():
                if (percentages[k] != 100.0):
                    percentages_without_100.update({k: v})

            percentages_pd = pd.DataFrame(percentages_without_100, index = ['% Control'])
            display(HTML('<h2>'+ sm_kinome_proper_cap + ' binds to the following kinases</h2>'))
            display(HTML('<i>The small molecules with % control values of 100 are omitted. Scroll right if necessary to see all kinases.<i>'))
            display(HTML(percentages_pd.to_html()))

        else:
            sm_df = sm_df[sm_df['Kd'].notna()] # remove all rows with no value
            sm_df = sm_df.sort_values(by = ['Kd'])
            kds = dict(zip(sm_df['Protein Name'], sm_df['Kd']))
            kds_pd = pd.DataFrame(kds, index = ['Kd'])

            display(HTML('<h2>'+ sm_kinome_proper_cap + ' binds to the following kinases</h2>'))
            display(HTML('<i>Scroll right if necessary to see all kinases.<i>'))
            display(HTML(kds_pd.to_html()))

    else:
        display(HTML('<h2> The small molecule input was not recognized. </h2>'))        

else:
    display(HTML('<h2> There was no small molecule input. </h2>'))
    

In [None]:
# Function that bar charts will use to make the data labels readable 
def prep_and_wrap(aList):
    add_commas = ', '.join(aList)
    return '<br>'.join(textwrap.wrap(add_commas, width = 50))    

In [None]:
%%appyter markdown

### Generate bar chart for small molecule input from KINOMEscan data
Hover over bar(s) to see kinases.

In [None]:
# if there was a small molecule input, generate and display the bar chart
if (sm_name_kinome != ''): 
    if (sm_kinome_valid):
        if percentages_exists:
            keys_0 = []
            keys_20 = []
            keys_40 = []
            keys_60 = []
            keys_80 = []

            for key in percentages.keys():
                if percentages[key] < 20.0:
                    keys_0.append(key)
                elif percentages[key] < 40.0:
                    keys_20.append(key)
                elif percentages[key] < 60.0:
                    keys_40.append(key)
                elif percentages[key] < 80.0:
                    keys_60.append(key)
                elif percentages[key] < 99.0:
                    keys_80.append(key)

            # Prepare and wrap text for labels 
            kinase0_label = prep_and_wrap(keys_0)
            kinase20_label = prep_and_wrap(keys_20)
            kinase40_label = prep_and_wrap(keys_40)
            kinase60_label = prep_and_wrap(keys_60)
            kinase80_label = prep_and_wrap(keys_80)

            x = ['< 20%', '20% ≤ ... < 40%', '40% ≤ ... < 60%', '60% ≤ ... < 80%', '80% ≤ ... < 99%']
            y = [len(keys_0), len(keys_20), len(keys_40), len(keys_60), len(keys_80)]

            fig1 = go.Figure(data=[go.Bar(x = x, y = y, text = y, width = 0.3, textposition = 'auto',
                                     hovertext = [kinase0_label, kinase20_label, kinase40_label, kinase60_label, kinase80_label], 
                                     hoverlabel = dict(font = dict(size = 18)),
                                     marker = {'color': y, 
                                               'colorscale': ['#66CCEE', '#228833', '#CCBB44', '#EE6677', '#AA3377']}
                                        )])

            fig1.update_layout(
                title = {
                    'text': 'Kinases bound by ' + sm_kinome_proper_cap,
                    'y':0.87,
                    'x':0.5,
                    'xanchor': 'center',
                    'yanchor': 'top',
                },
                xaxis_title = "% Control",
                yaxis_title = "Kinases",
                font = dict(
                    #family = "Courier New, monospace",
                    size = 18,
                    color = 'black'
                )
            )

            fig1.show()

        else:
            kds_1 = []
            kds_2 = []
            kds_3 = []

            for key in kds.keys():
                if kds[key] < 100.0:
                    kds_1.append(key)
                elif kds[key] < 1000.0:
                    kds_2.append(key)
                elif kds[key] < 10000.0:
                    kds_3.append(key)

            # Prepare and wrap text for labels 
            kds1_label = prep_and_wrap(kds_1)
            kds2_label = prep_and_wrap(kds_2)
            kds3_label = prep_and_wrap(kds_3)

            x = ['Kd < 100 nM', '100 nM ≤ Kd < 1µM', '1µM ≤ Kd < 10 µM']
            y = [len(kds_1), len(kds_2), len(kds_3)]

            fig2 = go.Figure(data=[go.Bar(x = x, y = y, text = y, width = 0.3, textposition = 'auto',
                                     hovertext = [kds1_label, kds2_label, kds3_label], 
                                     hoverlabel = dict(font = dict(size = 18)),
                                     marker = {'color': y,
                                                'colorscale': ['#66CCEE', '#228833', '#CCBB44', '#EE6677', '#AA3377']}
                                         )])

            fig2.update_layout(
                title = {
                    'text': 'Kinases bound by ' + sm_kinome_proper_cap,
                    'y':0.87,
                    'x':0.5,
                    'xanchor': 'center',
                    'yanchor': 'top',
                },
                xaxis_title = "Kd",
                yaxis_title = "Kinases",
                font = dict(
                    #family = "Courier New, monospace",
                    size = 18,
                    color = 'black'
                )
            )

            fig2.show()
            
    else:
        display(HTML('<h2> The small molecule input was not recognized. </h2>'))
        
else:
    display(HTML('<h2> There was no small molecule input. </h2>'))

In [None]:
%%appyter markdown

### Generate list of small molecules for kinase input from KINOMEscan data
Includes download of the KINOMEscan GMT files. For more information on how this was assembled, go to 
the KinomeScan-Appyter folder in the HarmonizomePythonScripts repository
(https://github.com/MaayanLab/HarmonizomePythonScripts).

In [None]:
%%appyter code_exec

# if kinase inputted, return small molecules  
kinase_name_kinome = {{ StringField(
        name = 'Kinase name for KINOMEscan',
        label = 'Kinase name',
        description = 'One kinase input. Examples include: ABL2, ALK, CSF1R, EPHA3',
        default = '',
        section = 'section0'
    )}}

if not os.path.exists('percentage_levels.gmt'):
    urllib.request.urlretrieve('https://gist.githubusercontent.com/serena-zhang/fa52e2a629dd8ca6b3b0270674f2e5e7/raw/c4a1d1ba7ae84e6a0502a0338152d7e8ffbf9018/percentage_levels.gmt', 'percentage_levels.gmt')
percentage_levels = {split_line[0]: split_line[2:] for split_line in map(lambda s: s.split('\t'), open('percentage_levels.gmt', 'r'))}
if not os.path.exists('kds_levels.gmt'):
    urllib.request.urlretrieve('https://gist.githubusercontent.com/serena-zhang/fa52e2a629dd8ca6b3b0270674f2e5e7/raw/c4a1d1ba7ae84e6a0502a0338152d7e8ffbf9018/kds_levels.gmt', 'kds_levels.gmt')
kds_levels = {split_line[0]: split_line[2:] for split_line in map(lambda s: s.split('\t'), open('kds_levels.gmt', 'r'))}

# Create dictionary for capitalization
kinase_names_cap = {}
for kinase_name in percentage_levels.keys():
    kinase_names_cap.update({kinase_name.lower() : kinase_name})
for kinase_name in kds_levels.keys():
    kinase_names_cap.update({kinase_name.lower() : kinase_name})
    
if (kinase_name_kinome != ''):    
    kinase_name_kinome = kinase_name_kinome.lower()
    
    # check for valid input
    kinase_kinome_valid = kinase_name_kinome in kinase_names_cap
    
    if (kinase_kinome_valid):
        kinase_proper_cap = kinase_names_cap[kinase_name_kinome] # Retrieve proper capitalization of kinase
        
        percentage_list = []
        kds_list = []
        percentage_dict_exists = False
        kds_dict_exists = False
        if kinase_proper_cap in percentage_levels:
            percentage_list = percentage_levels[kinase_proper_cap]
        if kinase_proper_cap in kds_levels:
            kds_list = kds_levels[kinase_proper_cap]

        if (len(percentage_list) != 0):
            percentage_dict = defaultdict(list)
            percentage_dict_exists = True
            for sm in percentage_list:
                if not sm.isspace(): # Remove tab character
                    percentage_dict[sm.split(',')[0]] = sm.split(',')[1]
            percentage_dict = {k: v for k, v in sorted(percentage_dict.items(), key=lambda item: float(item[1]))} # Sort
            table1df = pd.DataFrame(percentage_dict, index = ['% Control'])

            display(HTML('<h2>The small molecules that ' + kinase_proper_cap + ' binds to, with corresponding % Control values</h2>'))
            display(HTML('<i>Scroll right if necessary to see all small molecules. Molecules with % Control of 100 were omitted.<i>'))
            display(HTML(table1df.to_html()))

        if (len(kds_list) != 0):
            kds_dict = defaultdict(list)
            kds_dict_exists = True
            for sm in kds_list:
                if not sm.isspace(): # Remove tab character
                    kds_dict[sm.split(',')[0]] = sm.split(',')[1] # Make dictionary
            kds_dict = {k: v for k, v in sorted(kds_dict.items(), key=lambda item: float(item[1]))} # Sort
            table2df = pd.DataFrame([kds_dict], index = ['Kd'])

            display(HTML('<h2>The small molecules that ' + kinase_proper_cap + ' binds to, with corresponding Kd values</h2>'))
            display(HTML('<i>Scroll right if necessary to see all molecules.<i>'))
            display(HTML(table2df.to_html()))

    else:
        display(HTML('<h2> The kinase input was not recognized. </h2>'))
        
else:
    display(HTML('<h2> There was no kinase input. </h2>'))

In [None]:
%%appyter markdown

### Generate bar chart for kinase input from KINOMEscan data
Hover over bar(s) to see small molecules.

In [None]:
# if there was a small molecule input, generate and display the bar chart
if (kinase_name_kinome != ''): 
    if (kinase_kinome_valid):
        if percentage_dict_exists:
            # List of small molecules in each category 
            sm_0 = []
            sm_20 = []
            sm_40 = []
            sm_60 = []
            sm_80 = []

            for key in percentage_dict.keys():
                if float(percentage_dict[key]) < 20.0:
                    sm_0.append(key)
                elif float(percentage_dict[key]) < 40.0:
                    sm_20.append(key)
                elif float(percentage_dict[key]) < 60.0:
                    sm_40.append(key)
                elif float(percentage_dict[key]) < 80.0:
                    sm_60.append(key)
                else:
                    sm_80.append(key)

            # Prepare and wrap text for labels 
            sm0_label = prep_and_wrap(sm_0)
            sm20_label = prep_and_wrap(sm_20)
            sm40_label = prep_and_wrap(sm_40)
            sm60_label = prep_and_wrap(sm_60)
            sm80_label = prep_and_wrap(sm_80)

            x = ['< 20%', '20% ≤ ... < 40%', '40% ≤ ... < 60%', '60% ≤ ... < 80%', '80% ≤ ... < 100%']
            y = [len(sm_0), len(sm_20), len(sm_40), len(sm_60), len(sm_80)]

            fig1 = go.Figure(data=[go.Bar(x = x, y = y, text = y, width = 0.3, textposition = 'auto',
                                     hovertext = [sm0_label, sm20_label, sm40_label, sm60_label, sm80_label], 
                                     hoverlabel = dict(font = dict(size = 18)),
                                     marker = {'color': y,
                                                'colorscale': ['#66CCEE', '#228833', '#CCBB44', '#EE6677', '#AA3377']
                                              })])

            fig1.update_layout(
                title = {
                    'text': 'Small molecules bound by ' + kinase_proper_cap + ' (% Control)',
                    'y':0.87,
                    'x':0.5,
                    'xanchor': 'center',
                    'yanchor': 'top',
                },
                xaxis_title = "% Control",
                yaxis_title = "Small molecules",
                font = dict(
                    #family = "Courier New, monospace",
                    size = 18,
                    color = 'black'
                )
            )

            fig1.show()

        if kds_dict_exists:
            sm_kds_1 = []
            sm_kds_2 = []
            sm_kds_3 = []

            for key in kds_dict.keys():
                if float(kds_dict[key]) < 100.0:
                    sm_kds_1.append(key)
                elif float(kds_dict[key]) < 1000.0:
                    sm_kds_2.append(key)
                elif float(kds_dict[key]) < 10000.0:
                    sm_kds_3.append(key)

            # Prepare and wrap text for labels 
            sm_kds1_label = prep_and_wrap(sm_kds_1)
            sm_kds2_label = prep_and_wrap(sm_kds_2)
            sm_kds3_label = prep_and_wrap(sm_kds_3)

            x = ['Kd < 100 nM', '100 nM ≤ Kd < 1µM', '1µM ≤ Kd < 10 µM']
            y = [len(sm_kds_1), len(sm_kds_2), len(sm_kds_3)]

            fig2 = go.Figure(data=[go.Bar(x = x, y = y, text = y, width = 0.2, textposition = 'auto',
                                     hovertext = [sm_kds1_label, sm_kds2_label, sm_kds3_label], 
                                     hoverlabel = dict(font = dict(size = 18)),
                                     marker = {'color': y,
                                                'colorscale': ['#66CCEE', '#228833', '#CCBB44', '#EE6677', '#AA3377']})])

            fig2.update_layout(
                title = {
                    'text': 'Small molecules bound by ' + kinase_proper_cap + ' (Kd)',
                    'y':0.87,
                    'x':0.5,
                    'xanchor': 'center',
                    'yanchor': 'top',
                },
                xaxis_title = "Kd",
                yaxis_title = "Small molecules",
                font = dict(
                    #family = "Courier New, monospace",
                    size = 18,
                    color = 'black'
                )
            )

            fig2.show()
            
    else:
        display(HTML('<h2> The kinase input was not recognized. </h2>'))        
    
else:
    display(HTML('<h2> There was no small molecule input. </h2>'))

In [None]:
%%appyter markdown

### Generate list of kinases for small molecule input based on TAS vectors

In [None]:
%%appyter code_exec

# if small molecule name inputted, will go through its CSV file and sort by target affinity

sm_name = {{ StringField(
        name = 'Small molecule name',
        label = 'Small molecule name',
        description = 'One small molecule input. Examples include: (s)-CR8, AC220, Afatinib, Seliciclib',
        default = 'afatinib',
        section = 'section'
    )}}

kinase1_string = ''
kinase2_string = ''
kinase3_string = ''

if (sm_name != ''):
    kinome_df = pd.read_excel('http://lincs.hms.harvard.edu/wordpress/wp-content/uploads/2013/11/HMS-LINCS_KinomeScan_Datasets_2018-01-18.xlsx')
    kinome_df_copy = pd.read_excel('http://lincs.hms.harvard.edu/wordpress/wp-content/uploads/2013/11/HMS-LINCS_KinomeScan_Datasets_2018-01-18.xlsx')
    
    kinome_df['sm_hms_id'] = kinome_df['sm_hms_id'].str.replace(r'\D', '') #remove HMSL before the ID
    sm_name_lower = sm_name.lower()
    
    # check for valid input
    kinome_df_copy['sm_name'] = kinome_df_copy['sm_name'].str.lower()
    kinome_sm_names = kinome_df_copy['sm_name']
    kinome_sm_names_list = []
    for name in kinome_sm_names:
        kinome_sm_names_list.append(name)
    sm_tas_valid = sm_name_lower in kinome_sm_names_list
    
    if (sm_tas_valid):
        hms_id = kinome_df.loc[kinome_df['sm_name'].str.lower() == sm_name_lower, 'sm_hms_id']
        hms_id = remove_row_number(hms_id)
    
        # return small molecule with proper capitalization
        sm_tas_proper_cap = kinome_df.loc[kinome_df['sm_hms_id'] == hms_id, 'sm_name']
        sm_tas_proper_cap = remove_row_number(sm_tas_proper_cap)
    
        url = 'http://lincs.hms.harvard.edu/db/datasets/20000/results?small+molecules={}&output_type=.csv'.format(hms_id)
        data = pd.read_csv(url)
        df = pd.DataFrame(data)

        target_affinity_1 = df.loc[df['Binding Class'] == 1, 'HUGO Gene Symbol']
        target_affinity_2 = df.loc[df['Binding Class'] == 2, 'HUGO Gene Symbol']
        target_affinity_3 = df.loc[df['Binding Class'] == 3, 'HUGO Gene Symbol']
        target_affinity_10 = df.loc[df['Binding Class'] == 10, 'HUGO Gene Symbol'] 

        kinase1_list = []
        kinase2_list = []
        kinase3_list = []

        for kinase in target_affinity_1:
            if kinase not in kinase1_list: # removes duplicates
                kinase1_list.append(str(kinase))

        if len(kinase1_list) != 0:
            display(HTML('<h2>' + sm_tas_proper_cap + ' binds to the following kinases with Kd < 100 nM: </h2>'))
            kinase1_string = ', '.join(kinase1_list)
            display(HTML(kinase1_string))

        for kinase in target_affinity_2:
            if kinase not in kinase2_list: # removes duplicates
                kinase2_list.append(str(kinase))

        if len(kinase2_list) != 0:
            display(HTML('<h2>' + sm_tas_proper_cap + ' binds to the following kinases with 100 nM ≤ Kd < 1µM: </h2>'))
            kinase2_string = ', '.join(kinase2_list)
            display(HTML(kinase2_string))

        for kinase in target_affinity_3:
            if kinase not in kinase3_list: # removes duplicates
                kinase3_list.append(str(kinase))

        if len(kinase3_list) != 0:
            display(HTML('<h2>' + sm_tas_proper_cap + ' binds to the following kinases with 1µM ≤ Kd < 10 µM: </h2>'))
            kinase3_string = ', '.join(kinase3_list)
            display(HTML(kinase3_string))
    
    else:
        display(HTML('<h2> The small molecule input was not recognized. </h2>'))

else:
    display(HTML('<h2> There was no small molecule input. </h2>'))

In [None]:
%%appyter markdown

### Generate bar chart for small molecule input based on TAS vectors
Hover over bar(s) to see kinases.

In [None]:
# if there was a small molecule input, generate and display the bar chart
if (sm_name != ''): 
    
    if (sm_tas_valid):
        x = ['Kd < 100 nM', '100 nM ≤ Kd < 1µM', '1µM ≤ Kd < 10 µM']
        y = [len(kinase1_list), len(kinase2_list), len(kinase3_list)]

        # Text wraparound
        kinase1_lab = '<br>'.join(textwrap.wrap(kinase1_string, width=50))
        kinase2_lab ='<br>'.join(textwrap.wrap(kinase2_string, width=50))
        kinase3_lab ='<br>'.join(textwrap.wrap(kinase3_string, width=50))

        fig = go.Figure(data=[go.Bar(x = x, y = y, text = y, width = 0.3, textposition = 'auto',
                                     hovertext = [kinase1_lab, kinase2_lab, kinase3_lab], 
                                     hoverlabel = dict(font = dict(size = 18)),
                                     marker = {'color': y,
                                                'colorscale': ['#66CCEE', '#228833', '#CCBB44', '#EE6677', '#AA3377']})])

        fig.update_layout(
            title = {
                'text': 'Kinases bound by ' + sm_tas_proper_cap,
                'y':0.87,
                'x':0.5,
                'xanchor': 'center',
                'yanchor': 'top',
            },
            xaxis_title = "Equilibrium Dissociation Constant",
            yaxis_title = "Kinases",
            font = dict(
                #family = "Courier New, monospace",
                size = 18,
                color = 'black'
            )
        )

        fig.show()
        
    else:
        display(HTML('<h2> The small molecule input was not recognized. </h2>'))   
        
else:
    display(HTML('<h2> There was no small molecule input. </h2>'))

In [None]:
%%appyter markdown

### Generate list of small molecules for kinase input based on TAS vectors
Includes download of the TAS GMT files. For more information on how this was assembled, go to 
the KinomeScan-Appyter folder in the HarmonizomePythonScripts repository
(https://github.com/MaayanLab/HarmonizomePythonScripts). 

In [None]:
%%appyter code_exec

# if kinase inputted, return small molecules  
kinase_name = {{ StringField(
        name = 'Kinase name',
        label = 'Kinase name',
        description = 'One kinase input. Examples include: ABL1, ABL2, ALK, CCNA1, MAP2K1',
        default = '',
        section = 'section'
    )}}

sm1_string = ''
sm2_string = ''
sm3_string = ''

if not os.path.exists('level1.gmt'):
    urllib.request.urlretrieve('https://gist.githubusercontent.com/serena-zhang/a5344564a9beed30e7b5a626da1c0deb/raw/2ce9fad8256de9c288cedea24c64671c0f69e9e0/level1.gmt', 'level1.gmt')
level_1 = {split_line[0]: split_line[2:] for split_line in map(lambda s: s.split('\t'), open('level1.gmt', 'r'))}
if not os.path.exists('level2.gmt'):
    urllib.request.urlretrieve('https://gist.githubusercontent.com/serena-zhang/a5344564a9beed30e7b5a626da1c0deb/raw/2ce9fad8256de9c288cedea24c64671c0f69e9e0/level2.gmt', 'level2.gmt')
level_2 = {split_line[0]: split_line[2:] for split_line in map(lambda s: s.split('\t'), open('level2.gmt', 'r'))}
if not os.path.exists('level3.gmt'):
    urllib.request.urlretrieve('https://gist.githubusercontent.com/serena-zhang/a5344564a9beed30e7b5a626da1c0deb/raw/2ce9fad8256de9c288cedea24c64671c0f69e9e0/level3.gmt', 'level3.gmt')
level_3 = {split_line[0]: split_line[2:] for split_line in map(lambda s: s.split('\t'), open('level3.gmt', 'r'))}

# create dictionary for capitalization
kinase_tas_cap = {}
for kinase in level_1.keys():
    kinase_tas_cap.update({kinase.lower() : kinase})
for kinase in level_2.keys():
    if (kinase.lower() not in kinase_tas_cap):
        kinase_tas_cap.update({kinase.lower() : kinase})
for kinase in level_3.keys():
    if (kinase.lower() not in kinase_tas_cap):
        kinase_tas_cap.update({kinase.lower() : kinase})

if (kinase_name != ''):
    # check for valid input
    kinase_name = kinase_name.lower()
    kinase_tas_valid = kinase_name in kinase_tas_cap

    if (kinase_tas_valid):
        # retrieve proper kinase capitalization
        kinase_name = kinase_tas_cap[kinase_name]
        
        # lists for each target affinity for the small molecules
        sm1_list = []
        sm2_list = []
        sm3_list = []

        if kinase_name in level_1:
            sm1_list = level_1[kinase_name]
        if kinase_name in level_2:
            sm2_list = level_2[kinase_name]
        if kinase_name in level_3:
            sm3_list = level_3[kinase_name]

        if len(sm1_list) != 0:
            display(HTML('<h2>' + kinase_name + ' binds to the following small molecules with Kd < 100 nM: </h2>'))
            sm1_string = ', '.join(sm1_list)
            display(HTML(sm1_string))

        if len(sm2_list) != 0:
            display(HTML('<h2>' + kinase_name + ' binds to the following small molecules with 100 nM ≤ Kd < 1µM: </h2>'))
            sm2_string = ', '.join(sm2_list)
            display(HTML(sm2_string))

        if len(sm3_list) != 0:
            display(HTML('<h2>' + kinase_name + ' binds to the following small molecules with 1µM ≤ Kd < 10 µM: </h2>'))
            sm3_string = ', '.join(sm3_list)
            display(HTML(sm3_string))
    
    else:
        display(HTML('<h2> The kinase input was not recognized. </h2>'))         

else:
    display(HTML('<h2> There was no kinase input. </h2>'))

In [None]:
%%appyter markdown

### Generate bar chart for kinase input based on TAS vectors
Hover over bar(s) to see small molecules.

In [None]:
# if there was a kinase input, generate and display the bar chart
if (kinase_name != ''):
    
    if (kinase_tas_valid):
        x = ['Kd < 100 nM', '100 nM ≤ Kd < 1µM', '1µM ≤ Kd < 10 µM']
        y = [len(sm1_list), len(sm2_list), len(sm3_list)]

        # Text wraparound
        sm1_lab = '<br>'.join(textwrap.wrap(sm1_string, width = 50))
        sm2_lab ='<br>'.join(textwrap.wrap(sm2_string, width = 50))
        sm3_lab ='<br>'.join(textwrap.wrap(sm3_string, width = 50))

        fig = go.Figure(data=[go.Bar(x = x, y = y, text = y, width = 0.3, textposition = 'auto',
                                     hovertext = [sm1_lab, sm2_lab, sm3_lab], 
                                     hoverlabel = dict(font = dict(size = 18)),
                                     marker = {'color': y,
                                                'colorscale': ['#66CCEE', '#228833', '#CCBB44', '#EE6677', '#AA3377']})])

        fig.update_layout(
            title = {
                'text': 'Small molecules that bind ' + kinase_name,
                'y':0.87,
                'x':0.5,
                'xanchor': 'center',
                'yanchor': 'top',
            },
            xaxis_title = "Equilibrium Dissociation Constant",
            yaxis_title = "Number of small molecules",
            font = dict(
                #family = "Courier New, monospace",
                size = 18,
                color = 'black'
            )
        )

        fig.show()
    
    else:
        display(HTML('<h2> The kinase input was not recognized. </h2>'))         

else:
    display(HTML('<h2> There was no kinase input. </h2>'))