In [None]:
#%%appyter init
from appyter import magic
magic.init(lambda _=globals: _())

In [None]:
%%appyter markdown

# KINOMEscan and TAS Vector Data Visualization

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import textwrap 
import os.path
import urllib.request
import xlrd
from collections import defaultdict
from IPython.core.display import display, HTML
import json
import requests 
from time import sleep

In [None]:
%%appyter hide_code

{% do SectionField(name ='title', title = 'Visualize KINOMEscan Data', 
                   subtitle = 'KINOMEscan assay platform data show small molecule binding and inhibition of protein kinases.', 
                   img = 'poll.png')%}

{% do SectionField(name = 'section0', title = 'Input a Small Molecule and/or Kinase', 
                   subtitle = 'Input a small molecule to visualize the kinases it binds to ' + 
                   '-AND/OR- input a kinase to visualize the small molecules that bind it.', img = 'database.png')%} 


{% do SectionField(name = 'section2', title = 'Upload or Enter a List of Kinases', 
                   subtitle = 'Upload or enter a kinase list to visualize the drugs that best target those protein kinases ', 
                   img = 'file-upload.png')%}  

{% do SectionField(name = 'section3', title = 'Upload or Enter a Gene/Protein List', 
                   subtitle = 'Upload or enter a gene/protein list to perform Kinase Enrichment Analysis, then visualize the drugs ' + 
                   'that best target the top associated protein kinases.', 
                   img = 'KEA3_transparent_logo.png')%} 

{% do SectionField(name ='title2', title = 'Visualize Target Affinity Spectrum (TAS) Data',
                   subtitle = 'Target Affinity Spectrum (TAS) vectors summarize binding information ' +
                   'from multiple assay formats.', img = 'poll.png')%}

{% do SectionField(name = 'section', title = 'Input a Small Molecule and/or Kinase', 
                   subtitle = 'Input a small molecule to visualize the molecules it binds to ' + 
                   '-AND/OR- input a kinase to visualize the small molecules that bind it.', img = 'database.png')%} 

In [None]:
%%HTML
<!-- Formatting for the tables -->

<style type="text/css">
table.dataframe td, table.dataframe th {
    border: 2px  black solid !important;
  color: black !important;
}
</style>

In [None]:
# Function that removes the row number (for when df.loc is used)
def remove_row_number(Series):
    for item in Series:
        return str(item)
    
# Function that bar charts will use to make the data labels readable 
def prep_and_wrap(aList):
    add_commas = ', '.join(aList)
    return '<br>'.join(textwrap.wrap(add_commas, width = 50))  

# Function to generate all bar charts
def generateBarChart(xdata, ydata, w, hover, titletext, xtitle, ytitle):           
    fig = go.Figure(data=[go.Bar(x = xdata, y = ydata, text = ydata, width = w, textposition = 'auto',
                                     hovertext = hover, 
                                     hoverlabel = dict(font = dict(size = 18)),
                                     marker = {'color': y,
                                                'colorscale': ['#66CCEE', '#228833', '#CCBB44', '#EE6677', '#AA3377']})])

    fig.update_layout(
                    title = {
                        'text': titletext,
                        'y':0.87,
                        'x':0.5,
                        'xanchor': 'center',
                        'yanchor': 'top',
                    },
                    xaxis_title = xtitle,
                    yaxis_title = ytitle,
                    font = dict(
                        #family = "Courier New, monospace",
                        size = 18,
                        color = 'black'
                        )
                    )

    return fig 

# Function to retrieve KEA3 results
def get_kea3_results(gene_set, query_name):
    ADDLIST_URL = 'https://amp.pharm.mssm.edu/kea3/api/enrich/'
    payload = {
        'gene_set': gene_set,
        'query_name': query_name
    }

    response = requests.post(ADDLIST_URL, data=json.dumps(payload))
    if not response.ok:
        raise Exception('Error analyzing gene list')
    sleep(1)

    return json.loads(response.text)

# Function to retireve top drugs given a list of kinases
def get_top_drugs(kinaseList, other_kinases):
    sm_percentage_list = []
    sm_kds_list = []
    all_relevant_sm = []
    for kinase in kinaseList:
        sm_percentage_list.append(perc_nested_dict[kinase].keys())
        all_relevant_sm.append(perc_nested_dict[kinase].keys())
        sm_kds_list.append(kds_nested_dict[kinase].keys())
        all_relevant_sm.append(kds_nested_dict[kinase].keys())

    sm_percentage_list = list(set([item for items in sm_percentage_list for item in items])) # remove nested lists and duplicates
    sm_kds_list = list(set([item for items in sm_kds_list for item in items])) # remove nested lists 
    all_relevant_sm = list(set([item for items in all_relevant_sm for item in items]))

    # Dict that will return a ranked list of sm NOT considering specificity
    # Format: {sm: avg_percent_for_kinases, sm2: ...}
    ranked_perc_dict = {} 
    ranked_kds_dict = {}

    # Dict that will return a ranked list of sm CONSIDERING specificity
    # Format: {sm: (avg_percent_for_kinases - avg_percent_for_other_kinases), sm2: ...}
    ranked_perc_dict_spec = {} 
    ranked_kds_dict_spec = {}

    # For all % Control sm that bind to at least one of the inputted kinases 
    for sm in sm_percentage_list:                        
        kinase_perc_total = 0 
        other_kinases_perc_total = 0

        # For all inputted kinases, find average % Control for each sm
        for kinase in kinaseList:
            if sm in perc_nested_dict[kinase].keys(): # if sm has data point for kinase
                kinase_perc_total += float(perc_nested_dict[kinase][sm])
            else:
                kinase_perc_total += 100.0

        avg_perc_for_kinases = kinase_perc_total / len(kinaseList)
        ranked_perc_dict.update({sm: avg_perc_for_kinases})

        # For all other kinases, find average % Control for each sm
        for kinase in other_kinases:
            if sm in perc_nested_dict[kinase].keys(): # if sm has data point for kinase
                other_kinases_perc_total += float(perc_nested_dict[kinase][sm])
            else:
                other_kinases_perc_total += 100.0 # if no data point, assume no binding at all

        avg_perc_for_other_kinases = other_kinases_perc_total / len(other_kinases)
        ranked_perc_dict_spec.update({sm: avg_perc_for_kinases - avg_perc_for_other_kinases})

    # for all Kd sm that bind to at least one of the inputted kinases 
    for sm in sm_kds_list:                        
        kinase_kds_total = 0 
        other_kinases_kds_total = 0

        # For all inputted kinases, find average Kd for each sm
        for kinase in kinaseList:
            if sm in kds_nested_dict[kinase].keys(): # if sm has data point for kinase
                kinase_kds_total += float(kds_nested_dict[kinase][sm])
            else:
                kinase_kds_total += 10000.0 # if no data point, assume no binding at all

        avg_kds_for_kinases = kinase_kds_total / len(kinaseList)
        ranked_kds_dict.update({sm: avg_kds_for_kinases})

        # For all other kinases, find average % Control for each sm
        for kinase in other_kinases:
            if sm in perc_nested_dict[kinase].keys(): # if sm has data point for kinase
                other_kinases_kds_total += float(perc_nested_dict[kinase][sm])
            else:
                other_kinases_kds_total += 10000.0

        avg_kds_for_other_kinases = other_kinases_kds_total / len(other_kinases)            
        ranked_kds_dict_spec.update({sm: avg_kds_for_kinases - avg_kds_for_other_kinases})

    # Sort the dictionaries to be ranked correctly
    ranked_perc_dict = sorted(ranked_perc_dict.items(), key=lambda x: x[1])
    ranked_perc_dict_spec = sorted(ranked_perc_dict_spec.items(), key=lambda x: x[1])
    ranked_kds_dict = sorted(ranked_kds_dict.items(), key=lambda x: x[1])
    ranked_kds_dict_spec = sorted(ranked_kds_dict_spec.items(), key=lambda x: x[1])  
        
    # Arrange data in columns for easy table-printing 
    col1_data = []
    col2_data = []
    col1_spec = []
    col2_spec = []
    for num in range(1, 6):
        col1_data.append(ranked_perc_dict[num-1][0] + ' (Average of ' + str(round(ranked_perc_dict[num-1][1], 2)) + '% Control) ')
        col2_data.append(ranked_kds_dict[num-1][0] + ' (Average Kd of ' + str(round(ranked_kds_dict[num-1][1], 2)) + 'nM)')
        col1_spec.append(ranked_perc_dict_spec[num-1][0] + ' (Net average of ' + str(round(ranked_perc_dict_spec[num-1][1], 2)) + '% Control)')
        col2_spec.append(ranked_kds_dict_spec[num-1][0] + ' (Net average Kd of ' + str(round(ranked_kds_dict_spec[num-1][1], 2)) + 'nM)')
        
        
    return col1_data, col2_data, col1_spec, col2_spec

## Generate table and barchart of kinases for small molecule input from KINOMEscan data, with either equilibrium dissociation constant Kd or % Control 
For both Kd and % Control, the lower the number, the higher the binding affinity and inhibition.
Hover over bar chart bar(s) to see kinases.

In [None]:
%%appyter code_exec

# if small molecule name inputted, will go through its CSV file and sort by % Control

sm_name_kinome = {{ StringField(
        name = 'Small molecule name for KINOMEscan',
        label = 'Small molecule',
        description = 'One small molecule input. Examples include: (s)-CR8, AC220, Afatinib, Seliciclib',
        default = '',
        section = 'section0'
    )}}

kinome_scan_df = pd.read_excel('http://lincs.hms.harvard.edu/wordpress/wp-content/uploads/2013/11/HMS-LINCS_KinomeScan_Datasets_2018-01-18.xlsx')

# Create list of all sm to be used later
all_sm = []
for sm in range(0, 182):
    all_sm.append(kinome_scan_df.iloc[sm]['sm_name'])

if (sm_name_kinome != ''):
    kinome_scan_df_copy = pd.read_excel('http://lincs.hms.harvard.edu/wordpress/wp-content/uploads/2013/11/HMS-LINCS_KinomeScan_Datasets_2018-01-18.xlsx')

    sm_name_kinome_lower = sm_name_kinome.lower() # Disregard capitalization of input
    
    # check for valid input
    kinome_scan_df_copy['sm_name'] = kinome_scan_df_copy['sm_name'].str.lower()
    kinome_scan_sm_names = kinome_scan_df_copy['sm_name']
    kinome_scan_sm_list = []
    for name in kinome_scan_sm_names:
        kinome_scan_sm_list.append(name)
    sm_kinome_valid = sm_name_kinome_lower in kinome_scan_sm_list
    
        
    if (sm_kinome_valid):
        dataset_id = kinome_scan_df.loc[kinome_scan_df['sm_name'].str.lower() == sm_name_kinome_lower, 'dataset_id']
        dataset_id = remove_row_number(dataset_id)

        # return small molecule with proper capitalization
        sm_kinome_proper_cap = kinome_scan_df.loc[kinome_scan_df['dataset_id'] == int(dataset_id), 'sm_name']
        sm_kinome_proper_cap = remove_row_number(sm_kinome_proper_cap)

        # retrieve correct small molecule csv
        sm_url = 'http://lincs.hms.harvard.edu/db/datasets/{}/results?search=&output_type=.xlsx'.format(dataset_id)
        sm_kinome_data = pd.read_excel(sm_url)
        sm_df = pd.DataFrame(sm_kinome_data)

        percentages = defaultdict(set)
        kds = defaultdict(set)

        if '% Control' in sm_df:
            sm_df = sm_df.sort_values(by = '% Control')
            percentages = dict(zip(sm_df['Protein Name'], sm_df['% Control']))
            percentages_without_100 = {}
            # Print in table format
            for k, v in percentages.items():
                if (percentages[k] != 100.0):
                    percentages_without_100.update({k: v})

            percentages_pd = pd.DataFrame(percentages_without_100, index = ['% Control'])
            display(HTML('<h2>'+ sm_kinome_proper_cap + ' binds to the following kinases</h2>'))
            display(HTML('<i>The small molecules with % control values of 100 are omitted. Scroll right if necessary to see all kinases.<i>'))
            display(HTML(percentages_pd.to_html()))
            
            # Generate bar chart
            keys_0 = []
            keys_20 = []
            keys_40 = []
            keys_60 = []
            keys_80 = []
            keys_95 = []

            for key in percentages.keys():
                if percentages[key] < 20.0:
                    keys_0.append(key)
                elif percentages[key] < 40.0:
                    keys_20.append(key)
                elif percentages[key] < 60.0:
                    keys_40.append(key)
                elif percentages[key] < 80.0:
                    keys_60.append(key)
                elif percentages[key] < 95.0:
                    keys_80.append(key)
 
            # Prepare and wrap text for labels 
            kinase0_label = prep_and_wrap(keys_0)
            kinase20_label = prep_and_wrap(keys_20)
            kinase40_label = prep_and_wrap(keys_40)
            kinase60_label = prep_and_wrap(keys_60)
            kinase80_label = prep_and_wrap(keys_80)

            x = ['< 20%', '20% ≤ ... < 40%', '40% ≤ ... < 60%', '60% ≤ ... < 80%', '80% ≤ ... < 95%', '<95%']
            y = [len(keys_0), len(keys_20), len(keys_40), len(keys_60), len(keys_80)]
            hovertext = [kinase0_label, kinase20_label, kinase40_label, kinase60_label, kinase80_label]
            title = 'Kinases bound by ' + sm_kinome_proper_cap
            xaxis_title = '% Control'
            yaxis_title = 'Kinases'

            fig1 = generateBarChart(x, y, 0.3, hovertext, title, xaxis_title, yaxis_title)
            fig1.show()

        else:
            sm_df = sm_df[sm_df['Kd'].notna()] # remove all rows with no value
            sm_df = sm_df.sort_values(by = ['Kd'])
            kds = dict(zip(sm_df['Protein Name'], sm_df['Kd']))
            kds_pd = pd.DataFrame(kds, index = ['Kd'])

            display(HTML('<h2>'+ sm_kinome_proper_cap + ' binds to the following kinases</h2>'))
            display(HTML('<i>Scroll right if necessary to see all kinases.<i>'))
            display(HTML(kds_pd.to_html()))
            
            # Generate bar chart
            kds_1 = []
            kds_2 = []
            kds_3 = []

            for key in kds.keys():
                if kds[key] < 100.0:
                    kds_1.append(key)
                elif kds[key] < 1000.0:
                    kds_2.append(key)
                elif kds[key] < 10000.0:
                    kds_3.append(key)

            # Prepare and wrap text for labels 
            kds1_label = prep_and_wrap(kds_1)
            kds2_label = prep_and_wrap(kds_2)
            kds3_label = prep_and_wrap(kds_3)

            x = ['Kd < 100 nM', '100 nM ≤ Kd < 1µM', '1µM ≤ Kd < 10 µM']
            y = [len(kds_1), len(kds_2), len(kds_3)]
            hovertext = [kds1_label, kds2_label, kds3_label]
            title = 'Kinases bound by ' + sm_kinome_proper_cap
            xaxis_title = 'Kd'
            yaxis_title = 'Kinases'

            fig2 = generateBarChart(x, y, 0.2, hovertext, title, xaxis_title, yaxis_title)
            fig2.show()
            
    else:
        display(HTML('<h2> The small molecule input was not recognized. </h2>'))        

else:
    display(HTML('<h2> There was no small molecule input. </h2>'))

## Generate table and bar chart of small molecules for kinase input from KINOMEscan data
Includes download of the KINOMEscan GMT files. For more information on how this was assembled, go to 
the KinomeScan-Appyter folder in the HarmonizomePythonScripts repository
(https://github.com/MaayanLab/HarmonizomePythonScripts).
Hover over bar chart bar(s) to see small molecules.

In [None]:
%%appyter code_exec

# if kinase inputted, return small molecules  
kinase_name_kinome = {{ StringField(
        name = 'Kinase name for KINOMEscan',
        label = 'Kinase',
        description = 'One kinase input. Examples include: ABL2, ALK, CSF1R, EPHA3',
        default = '',
        section = 'section0'
    )}}

if not os.path.exists('percentage_levels.gmt'):
    urllib.request.urlretrieve('https://gist.githubusercontent.com/serena-zhang/fa52e2a629dd8ca6b3b0270674f2e5e7/raw/c4a1d1ba7ae84e6a0502a0338152d7e8ffbf9018/percentage_levels.gmt', 'percentage_levels.gmt')
percentage_levels = {split_line[0]: split_line[2:] for split_line in map(lambda s: s.strip().split('\t'), open('percentage_levels.gmt', 'r'))}
if not os.path.exists('kds_levels.gmt'):
    urllib.request.urlretrieve('https://gist.githubusercontent.com/serena-zhang/fa52e2a629dd8ca6b3b0270674f2e5e7/raw/c4a1d1ba7ae84e6a0502a0338152d7e8ffbf9018/kds_levels.gmt', 'kds_levels.gmt')
kds_levels = {split_line[0]: split_line[2:] for split_line in map(lambda s: s.strip().split('\t'), open('kds_levels.gmt', 'r'))}

# Create nested percentage_levels_dict {perc_nested_dict[kinase]: {sm: % Control, sm2: % Control,...}}
perc_nested_dict = defaultdict()
for kinase in percentage_levels.keys():
    perc_nested_dict[kinase] = defaultdict()
    for sm in percentage_levels[kinase]:
        if not sm.isspace(): # Remove tab character
            perc_nested_dict[kinase][(sm.split(',')[0]).strip()] = sm.split(',')[1]
            
# Create nested kds_dict 
kds_nested_dict = defaultdict()
for kinase in kds_levels.keys():
    kds_nested_dict[kinase] = defaultdict()
    for sm in kds_levels[kinase]:
        if not sm.isspace(): # Remove tab character
            kds_nested_dict[kinase][(sm.split(',')[0]).strip()] = sm.split(',')[1]
            
# Create dictionary for capitalization
kinase_names_cap = {}
for kinase_name in percentage_levels.keys():
    kinase_names_cap.update({kinase_name.lower() : kinase_name})
for kinase_name in kds_levels.keys():
    kinase_names_cap.update({kinase_name.lower() : kinase_name})
    
if (kinase_name_kinome != ''):    
    kinase_name_kinome = kinase_name_kinome.lower()
    
    # check for valid input
    kinase_kinome_valid = kinase_name_kinome in kinase_names_cap
    
    if (kinase_kinome_valid):
        kinase_proper_cap = kinase_names_cap[kinase_name_kinome] # Retrieve proper capitalization of kinase
        
        percentage_dict_exists = False
        kds_dict_exists = False
        percentage_list = []
        kds_list = []

        if kinase_proper_cap in percentage_levels:
            percentage_list = percentage_levels[kinase_proper_cap]
        if kinase_proper_cap in kds_levels:
            kds_list = kds_levels[kinase_proper_cap]

        if (len(percentage_list) != 0):
            percentage_dict_exists = True
            percentage_dict = perc_nested_dict[kinase_proper_cap]
            percentage_dict = {k: v for k, v in sorted(percentage_dict.items(), key=lambda item: float(item[1]))} # Sort
            table1df = pd.DataFrame(percentage_dict, index = ['% Control'])

            # Display HTML table
            display(HTML('<h2>The small molecules that ' + kinase_proper_cap + ' binds to, with corresponding % Control values (in %)</h2>'))
            display(HTML('<i>Scroll right if necessary to see all small molecules. Molecules with % Control of 100 were omitted.<i>'))
            display(HTML(table1df.to_html()))
            
            # Display bar chart 
            # List of small molecules in each category 
            sm_0 = []
            sm_20 = []
            sm_40 = []
            sm_60 = []
            sm_80 = []

            for key in percentage_dict.keys():
                if float(percentage_dict[key]) < 20.0:
                    sm_0.append(key)
                elif float(percentage_dict[key]) < 40.0:
                    sm_20.append(key)
                elif float(percentage_dict[key]) < 60.0:
                    sm_40.append(key)
                elif float(percentage_dict[key]) < 80.0:
                    sm_60.append(key)
                else:
                    sm_80.append(key)

            # Prepare and wrap text for labels 
            sm0_label = prep_and_wrap(sm_0)
            sm20_label = prep_and_wrap(sm_20)
            sm40_label = prep_and_wrap(sm_40)
            sm60_label = prep_and_wrap(sm_60)
            sm80_label = prep_and_wrap(sm_80)

            x = ['< 20%', '20% ≤ ... < 40%', '40% ≤ ... < 60%', '60% ≤ ... < 80%', '80% ≤ ... < 100%']
            y = [len(sm_0), len(sm_20), len(sm_40), len(sm_60), len(sm_80)]
            hovertext = [sm0_label, sm20_label, sm40_label, sm60_label, sm80_label]
            title = 'Small molecules bound by ' + kinase_proper_cap + ' (% Control)'
            xaxis_title = '% Control'
            yaxis_title = 'Small molecules'

            fig1 = generateBarChart(x, y, 0.3, hovertext, title, xaxis_title, yaxis_title)
            fig1.show()

        if (len(kds_list) != 0):
            kds_dict_exists = True
            kds_dict = kds_nested_dict[kinase_proper_cap]
            kds_dict = {k: v for k, v in sorted(kds_dict.items(), key=lambda item: float(item[1]))} # Sort
            table2df = pd.DataFrame([kds_dict], index = ['Kd'])

            # Display HTML table
            display(HTML('<h2>The small molecules that ' + kinase_proper_cap + ' binds to, with corresponding Kd values (in nM)</h2>'))
            display(HTML('<i>Scroll right if necessary to see all molecules.<i>'))
            display(HTML(table2df.to_html()))
            
            # Display bar chart
            sm_kds_1 = []
            sm_kds_2 = []
            sm_kds_3 = []

            for key in kds_dict.keys():
                if float(kds_dict[key]) < 100.0:
                    sm_kds_1.append(key)
                elif float(kds_dict[key]) < 1000.0:
                    sm_kds_2.append(key)
                elif float(kds_dict[key]) < 10000.0:
                    sm_kds_3.append(key)

            # Prepare and wrap text for labels 
            sm_kds1_label = prep_and_wrap(sm_kds_1)
            sm_kds2_label = prep_and_wrap(sm_kds_2)
            sm_kds3_label = prep_and_wrap(sm_kds_3)

            x = ['Kd < 100 nM', '100 nM ≤ Kd < 1µM', '1µM ≤ Kd < 10 µM']
            y = [len(sm_kds_1), len(sm_kds_2), len(sm_kds_3)]
            hovertext = [sm_kds1_label, sm_kds2_label, sm_kds3_label]
            title = 'Small molecules bound by ' + kinase_proper_cap + ' (Kd)'
            xaxis_title = 'Kd'
            yaxis_title = 'Small molecules'

            fig2 = generateBarChart(x, y, 0.2, hovertext, title, xaxis_title, yaxis_title)
            fig2.show()

    else:
        display(HTML('<h2> The kinase input was not recognized. </h2>'))
        
else:
    display(HTML('<h2> There was no kinase input. </h2>'))

## Generate ranked lists of drugs for inputted or uploaded kinases

In [None]:
%%appyter code_exec

# Import kinase list as file or text box 
# Will choose file upload over textbox if a file is given 
kinase_list_file = {{ FileField(
        name = 'Upload kinase list',
        label = 'Upload kinase list',
        default = '',
        examples = {'sample_kinase_list.txt': 'https://raw.githubusercontent.com/MaayanLab/KinomeScan-Appyter/master/sample_kinase_list.txt'}, 
        section = 'section2'
    )}}

kinase_list_input = {{ TextField(
        name = 'Input kinase list',
        label = 'Input kinase list (one per row)',
        description = 'Kinase list input (e.g. AAK1 ABL2 ACVR1 EPHA6 EPHA7)',
        default = '',
        section = 'section2',
    )}}

input_exists = False 

if kinase_list_file != '':
    open_kinase_list_file = open(kinase_list_file,'r')
    lines = open_kinase_list_file.readlines()
    kinases = [x.strip() for x in lines]
    open_kinase_list_file.close()
    input_exists = True 
elif kinase_list_input != '':
    kinases = kinase_list_input.split('\n')
    kinases = [x.strip() for x in kinases]
    input_exists = True 
else:
    display(HTML('<h2>No kinase list was inputted or uploaded.</h2>'))

all_kinases = []
for kinase in percentage_levels.keys():
    all_kinases.append(kinase)
        
if input_exists:
    # remove any non-protein kinases and repeats from the inputted list
    prot_kinase_input = []
    invalid_kinases = []
    for kinase in kinases:
        if kinase in all_kinases:
            prot_kinase_input.append(kinase)
        else:
            invalid_kinases.append(kinase)
          
    # If there were invalid kinases, print them
    if len(invalid_kinases) != 0:
        display(HTML('<i>These protein kinase input(s) were invalid and therefore not considered in the drug rankings:</i>'))
        invalid_kin_df = pd.DataFrame(invalid_kinases)
        invalid_kin_df = pd.DataFrame.transpose(invalid_kin_df)
        invalid_kin_df = invalid_kin_df.rename(index={0: 'Invalid inputted kinase(s):'})
        display(HTML(invalid_kin_df.to_html()))
         
    # all the other kinases that were not inputted
    other_kinases = set() 
    for kinase in all_kinases:
        if kinase not in prot_kinase_input:
            other_kinases.add(kinase)
    
    # go through valid prot_kinases and retrieve the small molecules that bound to at least one of them
    if len(prot_kinase_input) != 0:
        top_drugs = get_top_drugs(prot_kinase_input, other_kinases)         
        
        display(HTML('<h2>Top 5 Drugs for % Control and Equilibrium Dissociation Constant, ranked by lowest average % Control and Kd, respectively</h2>'))
        display(HTML('<i>This ranking is likely to be more accurate for inputs with fewer kinases.</h2>'))
        table_df = pd.DataFrame([top_drugs[0], top_drugs[1]], columns = ['1.', '2.', '3.', '4.', '5.'])
        table_df = pd.DataFrame.transpose(table_df)
        table_df.columns = ['% Control', 'Kd']
        display(HTML(table_df.to_html()))

        display(HTML('<h2>Top 5 Drugs for % Control and Equilibrium Dissocation Constant, ranked by lowest net average % Control and Kd, respectively</h2>'))
        display(HTML('Net % Control is calculated by (% Control for Inputted Kinases - % Control for All Other Kinases). Net Kd is calculated in the same manner. The lower the net average, the stronger the binding affinity.'))
        display(HTML('<i>This ranking considers specificity, and is likely to be more accurate for inputs with more kinases.</h2>'))
        table_spec_df = pd.DataFrame([top_drugs[2], top_drugs[3]], columns = ['1.', '2.', '3.', '4.', '5.'])
        table_spec_df = pd.DataFrame.transpose(table_spec_df)
        table_spec_df.columns = ['% Control', 'Kd']
        display(HTML(table_spec_df.to_html()))
    else:
        display(HTML('<h2>There were no valid kinases inputted.</h2>')) 

## Perform Kinase Enrichment Analysis on the inputted or uploaded genes. 
## Generate ranked lists of drugs for these top associated kinases.

In [None]:
%%appyter code_exec

# Import kinase list as file or text box 
# Will choose file upload over textbox if a file is given 
gene_list_file = {{ FileField(
        name = 'Upload gene/protein list',
        label = 'Upload gene/protein list',
        default = '',
        examples = {'sample_gene_list.txt': 'https://raw.githubusercontent.com/MaayanLab/KEA3-Appyter/master/sample_gene_list.txt'},
        section = 'section3'
    )}}

gene_list_input = {{ TextField(
        name = 'Input gene/protein list',
        label = 'Input gene/protein list (one per row)',
        description = '',
        default = 'ZNF264 \nTMPO \nISL2 \nMAP3K8 \nEFNB1 \nEIF3C \nOSBPL11 \nABCF1 \nUTRN \nOPRK1 \nTSC1 ' +
                    '\nGAB2 \nRPS3P2 \nDDX3X \nPPP1CA \nNF2 \nRBM3 \nIRAK1 \nKCNH2 \nNPR1 \nMOCOS \nITSN2 ' +
                    '\nMITF \nARAF \nDAPK2 \nEPHB2 \nCACNA1G \nYWHAZ \nGMFB',
        section = 'section3',
    )}}

num_top_kinases = {{ IntField(
        name = 'Input number of top associated kinases to consider',
        label = 'Number of top kinases to consider',
        description = 'Input any integer from 5 to 50',
        default = 10,
        min = 5, 
        max = 50,
        section = 'section3'
    )}}

input_exists = False 

if gene_list_file != '':
    open_gene_list_file = open(gene_list_file,'r')
    lines = open_gene_list_file.readlines()
    genes = [x.strip() for x in lines]
    open_gene_list_file.close()
    input_exists = True 
elif gene_list_input != '':
    genes = gene_list_input.split('\n')
    genes = [x.strip() for x in genes]
    input_exists = True 
else:
    display(HTML('<h2>No gene/protein list was inputted or uploaded.</h2>'))
    
if input_exists:
    # Get KEA3 results in form of list of top kinases 
    results = get_kea3_results(genes, 'Query')
    rankings = []
    for rank in range(1, num_top_kinases+1):
        rankings.append(list(results.values())[0][rank-1]['TF'])
    
    # Get protein kinases in KINOMEscan database 
    valid_kinases = []
    for kinase in rankings:
        if kinase in all_kinases:
            valid_kinases.append(kinase)
            
    other_kinases =  set() 
    for kinase in all_kinases:
        if kinase not in valid_kinases:
            other_kinases.add(kinase)
    
    # Get top drugs for associated kinases
    top_drugs = get_top_drugs(valid_kinases, other_kinases)
    display(HTML('<h2>Top 5 Drugs for % Control and Equilibrium Dissociation Constant, ranked by lowest average % Control and Kd, respectively</h2>'))
    display(HTML('<i>This ranking is likely to be more accurate for a lower selected number of associated kinases.</h2>'))
    table_df = pd.DataFrame([top_drugs[0], top_drugs[1]], columns = ['1.', '2.', '3.', '4.', '5.'])
    table_df = pd.DataFrame.transpose(table_df)
    table_df.columns = ['% Control', 'Kd']
    display(HTML(table_df.to_html()))

    display(HTML('<h2>Top 5 Drugs for % Control and Equilibrium Dissocation Constant, ranked by lowest net average % Control and Kd, respectively</h2>'))
    display(HTML('Net % Control is calculated by (% Control for Inputted Kinases - % Control for All Other Kinases). Net Kd is calculated in the same manner. The lower the net average, the stronger the binding affinity.'))
    display(HTML('<i>This ranking considers specificity, and is likely to be more accurate for a lower selected number of associated kinases.</h2>'))
    table_spec_df = pd.DataFrame([top_drugs[2], top_drugs[3]], columns = ['1.', '2.', '3.', '4.', '5.'])
    table_spec_df = pd.DataFrame.transpose(table_spec_df)
    table_spec_df.columns = ['% Control', 'Kd']
    display(HTML(table_spec_df.to_html()))    
    

## Generate list and bar chart of kinases for small molecule input based on TAS vectors
Hover over bar chart bar(s) to see kinases. 

In [None]:
%%appyter code_exec

# if small molecule name inputted, will go through its CSV file and sort by target affinity

sm_name = {{ StringField(
        name = 'Small molecule name',
        label = 'Small molecule',
        description = 'One small molecule input. Examples include: (s)-CR8, AC220, Afatinib, Seliciclib',
        default = '',
        section = 'section'
    )}}

kinase1_string = ''
kinase2_string = ''
kinase3_string = ''

if (sm_name != ''):
    kinome_df = pd.read_excel('http://lincs.hms.harvard.edu/wordpress/wp-content/uploads/2013/11/HMS-LINCS_KinomeScan_Datasets_2018-01-18.xlsx')
    kinome_df_copy = pd.read_excel('http://lincs.hms.harvard.edu/wordpress/wp-content/uploads/2013/11/HMS-LINCS_KinomeScan_Datasets_2018-01-18.xlsx')
    
    kinome_df['sm_hms_id'] = kinome_df['sm_hms_id'].str.replace(r'\D', '') #remove HMSL before the ID
    sm_name_lower = sm_name.lower()
    
    # check for valid input
    kinome_df_copy['sm_name'] = kinome_df_copy['sm_name'].str.lower()
    kinome_sm_names = kinome_df_copy['sm_name']
    kinome_sm_names_list = []
    for name in kinome_sm_names:
        kinome_sm_names_list.append(name)
    sm_tas_valid = sm_name_lower in kinome_sm_names_list
    
    if (sm_tas_valid):
        hms_id = kinome_df.loc[kinome_df['sm_name'].str.lower() == sm_name_lower, 'sm_hms_id']
        hms_id = remove_row_number(hms_id)
    
        # return small molecule with proper capitalization
        sm_tas_proper_cap = kinome_df.loc[kinome_df['sm_hms_id'] == hms_id, 'sm_name']
        sm_tas_proper_cap = remove_row_number(sm_tas_proper_cap)
    
        url = 'http://lincs.hms.harvard.edu/db/datasets/20000/results?small+molecules={}&output_type=.csv'.format(hms_id)
        data = pd.read_csv(url)
        df = pd.DataFrame(data)

        target_affinity_1 = df.loc[df['Binding Class'] == 1, 'HUGO Gene Symbol']
        target_affinity_2 = df.loc[df['Binding Class'] == 2, 'HUGO Gene Symbol']
        target_affinity_3 = df.loc[df['Binding Class'] == 3, 'HUGO Gene Symbol']
        target_affinity_10 = df.loc[df['Binding Class'] == 10, 'HUGO Gene Symbol'] 

        kinase1_list = []
        kinase2_list = []
        kinase3_list = []

        for kinase in target_affinity_1:
            if kinase not in kinase1_list: # removes duplicates
                kinase1_list.append(str(kinase))

        if len(kinase1_list) != 0:
            display(HTML('<h2>' + sm_tas_proper_cap + ' binds to the following kinases with Kd < 100 nM: </h2>'))
            kinase1_string = ', '.join(kinase1_list)
            display(HTML(kinase1_string))

        for kinase in target_affinity_2:
            if kinase not in kinase2_list: # removes duplicates
                kinase2_list.append(str(kinase))

        if len(kinase2_list) != 0:
            display(HTML('<h2>' + sm_tas_proper_cap + ' binds to the following kinases with 100 nM ≤ Kd < 1µM: </h2>'))
            kinase2_string = ', '.join(kinase2_list)
            display(HTML(kinase2_string))

        for kinase in target_affinity_3:
            if kinase not in kinase3_list: # removes duplicates
                kinase3_list.append(str(kinase))

        if len(kinase3_list) != 0:
            display(HTML('<h2>' + sm_tas_proper_cap + ' binds to the following kinases with 1µM ≤ Kd < 10 µM: </h2>'))
            kinase3_string = ', '.join(kinase3_list)
            display(HTML(kinase3_string))
            
        # Generate and display bar chart
        x = ['Kd < 100 nM', '100 nM ≤ Kd < 1µM', '1µM ≤ Kd < 10 µM']
        y = [len(kinase1_list), len(kinase2_list), len(kinase3_list)]

        # Text wraparound
        kinase1_lab = '<br>'.join(textwrap.wrap(kinase1_string, width=50))
        kinase2_lab ='<br>'.join(textwrap.wrap(kinase2_string, width=50))
        kinase3_lab ='<br>'.join(textwrap.wrap(kinase3_string, width=50))
        
        hovertext = [kinase1_lab, kinase2_lab, kinase3_lab]
        title = 'Kinases bound by ' + sm_tas_proper_cap
        xaxis_title = 'Equilibrium Dissociation Constant'
        yaxis_title = 'Kinases'

        fig = generateBarChart(x, y, 0.3, hovertext, title, xaxis_title, yaxis_title)
        fig.show()
    
    else:
        display(HTML('<h2> The small molecule input was not recognized. </h2>'))

else:
    display(HTML('<h2> There was no small molecule input. </h2>'))

## Generate list and bar chart of small molecules for kinase input based on TAS vectors
Includes download of the TAS GMT files. For more information on how this was assembled, go to 
the KinomeScan-Appyter folder in the HarmonizomePythonScripts repository
(https://github.com/MaayanLab/HarmonizomePythonScripts). 
Hover over bar chart bar(s) to see small molecules. 

In [None]:
%%appyter code_exec

# if kinase inputted, return small molecules  
kinase_name = {{ StringField(
        name = 'Kinase name',
        label = 'Kinase',
        description = 'One kinase input. Examples include: ABL1, ABL2, ALK, CCNA1, MAP2K1',
        default = '',
        examples = {'Example: MAP2K1': 'MAP2K1'},
        section = 'section'
    )}}

sm1_string = ''
sm2_string = ''
sm3_string = ''

if not os.path.exists('level1.gmt'):
    urllib.request.urlretrieve('https://gist.githubusercontent.com/serena-zhang/a5344564a9beed30e7b5a626da1c0deb/raw/2ce9fad8256de9c288cedea24c64671c0f69e9e0/level1.gmt', 'level1.gmt')
level_1 = {split_line[0]: split_line[2:] for split_line in map(lambda s: s.split('\t'), open('level1.gmt', 'r'))}
if not os.path.exists('level2.gmt'):
    urllib.request.urlretrieve('https://gist.githubusercontent.com/serena-zhang/a5344564a9beed30e7b5a626da1c0deb/raw/2ce9fad8256de9c288cedea24c64671c0f69e9e0/level2.gmt', 'level2.gmt')
level_2 = {split_line[0]: split_line[2:] for split_line in map(lambda s: s.split('\t'), open('level2.gmt', 'r'))}
if not os.path.exists('level3.gmt'):
    urllib.request.urlretrieve('https://gist.githubusercontent.com/serena-zhang/a5344564a9beed30e7b5a626da1c0deb/raw/2ce9fad8256de9c288cedea24c64671c0f69e9e0/level3.gmt', 'level3.gmt')
level_3 = {split_line[0]: split_line[2:] for split_line in map(lambda s: s.split('\t'), open('level3.gmt', 'r'))}

# create dictionary for capitalization
kinase_tas_cap = {}
for kinase in level_1.keys():
    kinase_tas_cap.update({kinase.lower() : kinase})
for kinase in level_2.keys():
    if (kinase.lower() not in kinase_tas_cap):
        kinase_tas_cap.update({kinase.lower() : kinase})
for kinase in level_3.keys():
    if (kinase.lower() not in kinase_tas_cap):
        kinase_tas_cap.update({kinase.lower() : kinase})

if (kinase_name != ''):
    # check for valid input
    kinase_name = kinase_name.lower()
    kinase_tas_valid = kinase_name in kinase_tas_cap

    if (kinase_tas_valid):
        # retrieve proper kinase capitalization
        kinase_name = kinase_tas_cap[kinase_name]
        
        # lists for each target affinity for the small molecules
        sm1_list = []
        sm2_list = []
        sm3_list = []

        if kinase_name in level_1:
            sm1_list = level_1[kinase_name]
        if kinase_name in level_2:
            sm2_list = level_2[kinase_name]
        if kinase_name in level_3:
            sm3_list = level_3[kinase_name]

        if len(sm1_list) != 0:
            display(HTML('<h2>' + kinase_name + ' binds to the following small molecules with Kd < 100 nM: </h2>'))
            sm1_string = ', '.join(sm1_list)
            display(HTML(sm1_string))

        if len(sm2_list) != 0:
            display(HTML('<h2>' + kinase_name + ' binds to the following small molecules with 100 nM ≤ Kd < 1µM: </h2>'))
            sm2_string = ', '.join(sm2_list)
            display(HTML(sm2_string))

        if len(sm3_list) != 0:
            display(HTML('<h2>' + kinase_name + ' binds to the following small molecules with 1µM ≤ Kd < 10 µM: </h2>'))
            sm3_string = ', '.join(sm3_list)
            display(HTML(sm3_string))
          
        # Generate bar chart
        x = ['Kd < 100 nM', '100 nM ≤ Kd < 1µM', '1µM ≤ Kd < 10 µM']
        y = [len(sm1_list), len(sm2_list), len(sm3_list)]

        # Text wraparound
        sm1_lab = '<br>'.join(textwrap.wrap(sm1_string, width = 50))
        sm2_lab ='<br>'.join(textwrap.wrap(sm2_string, width = 50))
        sm3_lab ='<br>'.join(textwrap.wrap(sm3_string, width = 50))

        hovertext = [sm1_lab, sm2_lab, sm3_lab]
        title = 'Small molecules that bind ' + kinase_name                                    
        xaxis_title = 'Equilibrium Dissociation Constant'
        yaxis_title = 'Number of small molecules'

        fig = generateBarChart(x, y, 0.3, hovertext, title, xaxis_title, yaxis_title)
        fig.show()
    
    
    else:
        display(HTML('<h2> The kinase input was not recognized. </h2>'))         

else:
    display(HTML('<h2> There was no kinase input. </h2>'))