In [None]:
#%%appyter init
from appyter import magic
magic.init(lambda _=globals: _())

In [None]:
%%appyter markdown

<center>
    <h1> 
        <div style="font-size:3rem;font-weight:500"> <style="height:45px;padding:0 5px;display:inline"/> KEA3 Appyter </div>
    </h1>
    <br>
    <div style="font-size:2rem;font-weight:500">An appyter interface to the kinase enrichment analysis (KEA) database and enrichment analysis tool</div>
</center>

In [None]:
import json
import requests
from time import sleep
from tabulate import tabulate
from IPython.display import HTML, display, Image, FileLink
import plotly.graph_objects as go
import kaleido
import os 

In [None]:
def get_kea3_results(gene_set, query_name):
    ADDLIST_URL = 'https://amp.pharm.mssm.edu/kea3/api/enrich/'
    payload = {
        'gene_set': gene_set,
        'query_name': query_name
    }

    response = requests.post(ADDLIST_URL, data=json.dumps(payload))
    if not response.ok:
        raise Exception('Error analyzing gene list')
    sleep(1)

    return json.loads(response.text)

In [None]:
%%appyter hide_code

{% do SectionField(name ='section', title = 'Gene/protein list input', 
                   subtitle = 'Kinase Enrichment Analysis 3 (KEA3) infers upstream kinases whose putative substrates are ' +
                               'overrepresented in a user-inputted list of genes or differentially phosphorylated proteins. ' +
                               'The KEA3 database contains putative kinase-substrate interactions collected from publicly ' +
                               'available datasets. Upload -OR- input a gene/protein list to retrieve the top associated kinases.', 
                   img = 'KEA3_transparent_logo.png')%}

## Generate bar chart and tables containing top associated kinases for different libraries
Libraries are supersets of kinase substrate sets that are aggregated based on the database from which they are derived.

In [None]:
%%appyter code_exec

gene_list_file = {{ FileField(
        name = 'Upload gene/protein list',
        label = 'Upload gene/protein list',
        default = '',
        examples = {'sample_gene_list.txt': 'https://raw.githubusercontent.com/MaayanLab/KEA3-Appyter/master/sample_gene_list.txt'},
        section = 'section'
    )}}

gene_list_input = {{ TextField(
        name = 'Input gene/protein list',
        label = 'Input gene/protein list (one per row)',
        default = 'ZNF264 \nTMPO \nISL2 \nMAP3K8 \nEFNB1 \nEIF3C \nOSBPL11 \nABCF1 \nUTRN \nOPRK1 \nTSC1 ' +
                    '\nGAB2 \nRPS3P2 \nDDX3X \nPPP1CA \nNF2 \nRBM3 \nIRAK1 \nKCNH2 \nNPR1 \nMOCOS \nITSN2 ' +
                    '\nMITF \nARAF \nDAPK2 \nEPHB2 \nCACNA1G \nYWHAZ \nGMFB',
        section = 'section',
    )}}


# Number of ranked kinases to display for the tables
num_kinases = {{ IntField(
        name = 'Input number of top kinases to display',
        label = 'Number of top kinases for tables',
        description = 'Input any integer from 1 to 100',
        default = 10,
        min = 1, 
        max = 100,
        section = 'section'
    )}}

input_exists = False 

if gene_list_file != '':
    open_gene_list_file = open(gene_list_file,'r')
    lines = open_gene_list_file.readlines()
    genes = [x.strip() for x in lines]
    open_gene_list_file.close()
    input_exists = True 
elif gene_list_input != '':
    genes = gene_list_input.split('\n')
    genes = [x.strip() for x in genes]
    input_exists = True 
else:
    display(HTML('<h2>No kinase list was inputted or uploaded.</h2>'))

if input_exists:
    results = get_kea3_results(genes, 'Query')
    
    # Create dictionary of dictionaries storing the value of each library for each kinase
    # Format: mean_values['Kinase name'] = {String.bind: 22, ChengPPI: 7, ...}
    kinase_means = {}
    cumulative_sums = {}
    list_cumul_sums = []

    kinase_names = []
    
    # These lists contain all the values for each library type, in order of the kinases
    all_STRING_bind = []
    all_ChengPPI = []
    all_PhosDAll = []
    all_BioGRID = []
    all_HIPPIE = []
    all_ChengKSIN = []
    all_STRING = []
    all_MINT = []
    all_mentha = []
    all_prePPI = []
    all_PTMsigDB = []
    
    for kinase in range(0, 10):
        name = list(results.values())[0][kinase]['TF']
        kinase_names.append(name) # get list of all kinases
        
        kinase_means.update({name: {'STRING.bind': 0, 'ChengPPI': 0, 'PhosDAll': 0, 'BioGRID': 0, 'HIPPIE': 0, 
                                        'ChengKSIN': 0, 'STRING': 0, 'MINT': 0, 'mentha': 0, 'prePPI': 0, 
                                        'PTMsigDB': 0}})
        cumulative_sums.update({name: 0})
        
        all_lib_and_values = list(results.values())[0][kinase]['Library']
        num_libraries = 0
        for lib_mean in all_lib_and_values.split(';'):
            lib_name, mean =  lib_mean.split(',')
            kinase_means[name][lib_name] = mean # Update value of library in mean_values if exists
            num_libraries+=1
            cumulative_sums[name] += int(mean)
            
        if num_libraries >= 3:
            list_cumul_sums.append(cumulative_sums[name])
    
    list_cumul_sums.sort(reverse = True)
        
    # Sort the kinases, from highest to lowest cumulative sum 
    sorted_kinases = []
    for aSum in list_cumul_sums:
        for name, cumul_sum in cumulative_sums.items():  # for name, age in dictionary.iteritems():  (for Python 2.x)
            if cumul_sum == aSum:
                sorted_kinases.append(name)
        
    for kinase in sorted_kinases: 
        all_STRING_bind.append(int(kinase_means[kinase]['STRING.bind']))
        all_ChengPPI.append(int(kinase_means[kinase]['ChengPPI']))
        all_PhosDAll.append(int(kinase_means[kinase]['PhosDAll']))
        all_BioGRID.append(int(kinase_means[kinase]['BioGRID']))
        all_HIPPIE.append(int(kinase_means[kinase]['HIPPIE']))
        all_ChengKSIN.append(int(kinase_means[kinase]['ChengKSIN']))
        all_STRING.append(int(kinase_means[kinase]['STRING']))
        all_MINT.append(int(kinase_means[kinase]['MINT']))
        all_mentha.append(int(kinase_means[kinase]['mentha']))
        all_prePPI.append(int(kinase_means[kinase]['prePPI']))
        all_PTMsigDB.append(int(kinase_means[kinase]['PTMsigDB']))
        
    fig = go.Figure(data=[
                        go.Bar(name = 'STRING.bind', x = all_STRING_bind, y = sorted_kinases, 
                               marker=go.bar.Marker(color='rgb(196, 8, 8)'), orientation = 'h'),
                        go.Bar(name = 'ChengPPI', x = all_ChengPPI, y = sorted_kinases, orientation = 'h'),
                        go.Bar(name = 'PhosDAll', x = all_PhosDAll, y = sorted_kinases, 
                               marker=go.bar.Marker(color='rgb(242, 172, 68)'), orientation = 'h'),
                        go.Bar(name = 'BioGRID', x = all_BioGRID, y = sorted_kinases, 
                               marker=go.bar.Marker(color='rgb(236, 252, 68)'), orientation = 'h'),
                        go.Bar(name = 'HIPPIE', x = all_HIPPIE, y = sorted_kinases,
                               marker=go.bar.Marker(color='rgb(165, 242, 162)'), orientation = 'h'),
                        go.Bar(name = 'ChengKSIN', x = all_ChengKSIN, y = sorted_kinases, 
                               marker=go.bar.Marker(color='rgb(92, 217, 78)'), orientation = 'h'),
                        go.Bar(name = 'STRING', x = all_STRING, y = sorted_kinases, 
                               marker=go.bar.Marker(color='rgb(0, 138, 64)'), orientation = 'h'),
                        go.Bar(name = 'MINT', x = all_MINT, y = sorted_kinases, 
                               marker=go.bar.Marker(color='rgb(96, 191, 235)'), orientation = 'h'),
                        go.Bar(name = 'mentha', x = all_mentha, y = sorted_kinases, 
                               marker=go.bar.Marker(color='rgb(14, 130, 201)'), orientation = 'h'),
                        go.Bar(name = 'prePPI', x = all_prePPI, y = sorted_kinases, 
                               marker=go.bar.Marker(color='rgb(58, 50, 168)'), orientation = 'h'),
                        go.Bar(name = 'PTMsigDB', x = all_PTMsigDB, y = sorted_kinases, 
                               marker=go.bar.Marker(color='rgb(158, 50, 168)'), orientation = 'h'),
                    ])
    
    fig.update_layout(barmode = 'stack', orientation = 90)
    fig.update_layout(title = {
                        'text': 'Stacked Bar Chart of Sum of Ranks in Different Libraries',
                        'y':0.87,
                        'x':0.5,
                        'xanchor': 'center',
                        'yanchor': 'top',
                        },
                    xaxis_title = 'Cumulative Sum of Ranks in Different Libraries',
                    yaxis_title = 'Kinases',
                    font = dict(
                        size = 16,
                        color = 'black'
                        )
                    )
    
    display(HTML('<h2>This interactive bar chart summarizes the rankings of the top kinases from the different libraries.</h2>'))
    display(HTML('<i>Any kinases with rankings from fewer than 3 libraries were omitted. ' + 
                 'To download the file as a PNG, click the camera button in the upper right corner of the chart.</i>'))
    fig.show()
    
    #fig.write_image('Downloads/Stacked.svg')
    #display(FileLink('Downloads/Stacked.svg', result_html_prefix=str('Download svg: ')))
       
    # Create dictionary of dictionaries for the top 10 kinases for each ranking type
    # Format: rankings['Rank Type'] = {1: Kinase, 2: Kinase, 3: Kinase, ...}
    rankings = {}
    
    for library in range(0, 13):
        rankings[list(results.keys())[library]] = {}
        for rank in range(1, num_kinases+1):
            rankings[list(results.keys())[library]].update({rank: list(results.values())[library][rank-1]['TF']})
     
    # Format: ranks[0] will contain the #1 kinase for the ranking type 
    for library in range(0, 13):
        ranks = []
        for rank_num in range(0, num_kinases):
            ranks.append([rank_num+1])
        for rank in range(1, num_kinases+1):
            ranks[rank-1].append(list(rankings[list(results.keys())[library]].values())[rank-1])
     
    display(HTML('<h2>The following tables display the integrated (summarizing) results of the top kinases.</h2>'))
    for library in range(0, 2):
        display(HTML(tabulate(ranks, headers = [list(rankings.keys())[library]], tablefmt = 'html')))
        
    display(HTML('<h2>The following tables display the rankings of the top kinases from each library.</h2>'))
    for library in range(2, 13):
         display(HTML(tabulate(ranks, headers = [list(rankings.keys())[library]], tablefmt = 'html')))       