In [None]:
#%%appyter init
from appyter import magic
magic.init(lambda _=globals: _())

In [None]:
%%appyter markdown
# GMT File Augmentation
This Appyter augments gene sets within a submitted GMT file with co-occurrence or co-expression matrices
and outputs an augmented GMT file.

In [None]:
import requests
import json
import time
from tqdm import tqdm

import pandas as pd
from IPython.display import display, HTML

In [None]:
%%appyter hide_code
{% do SectionField(
    title='Input Settings',
    subtitle='Upload a GMT file, select a gene-gene correlation matrix, and choose the number of augmented genes to add to each gene set.',
    name='settings',
    img='settings.png',
) %}

{% set gmt_file = FileField(
    name='gmt_file',
    label='GMT File (.gmt or .txt)',
    default='test.gmt',
    description='Upload a GMT file for augmentation. The GMT file format is a tab delimited file format that describes gene sets.',
    examples={
        'test.gmt': 'https://maayanlab.cloud/Enrichr/geneSetLibrary?mode=text&libraryName=VirusMINT',
    },
    section='settings',
)%}

{% set augmentation_method = ChoiceField(
    name='augmentation_method',
    label='Gene Set Augmentation Method',
    description='Select a method for augmenting gene sets. The options are described in depth here: https://maayanlab.cloud/geneshot/help.html',
    choices={
        'ARCHS4 co-expression':'coexpression',
        'Enrichr co-occurrence':'enrichr',
        'Tagger co-occurrence':'tagger',
        'GeneRIF co-occurrence':'generif',
    },
    default= 'ARCHS4 co-expression',
    section='settings',
    
)%}

{% set num_genes = IntField(
    name='num_genes',
    label='Number of Augmented Genes',
    description='Select the number of augmented genes to append to each gene set [50-200]',
    min= 50,
    max=200,
    default= 100,
    section='settings',
)%}

In [None]:
%%appyter code_exec
def geneshot(gene_list):
    GENESHOT_URL = 'https://maayanlab.cloud/geneshot/api/associate'
    payload = {
      "gene_list": gene_list,
      "similarity": "{{augmentation_method}}"
    }
    response = requests.post(GENESHOT_URL, json=payload)

    data = json.loads(response.text)
    df = pd.DataFrame.from_dict({k:v['simScore'] for k,v in data['association'].items()},
                       orient = 'index',
                       columns = ['Score'])
    df = df.sort_values(by=['Score'], ascending = False).dropna()[0:{{num_genes}}]
    augmented_genes = df.loc[~df.index.isin(gene_list)].index.tolist()
    
    return augmented_genes

In [None]:
%%appyter code_exec
with open({{gmt_file}}, 'r') as f:
    genesets = f.read().split("\n")

genesetlibrary = {}
for line in genesets:
    set_label = line.split("\t\t")[0]
    genes = line.split("\t")[2:]
    genesetlibrary[set_label] = genes

In [None]:
%%appyter code_exec
for term,genes in tqdm(genesetlibrary.items()):
    augmented_list = geneshot(genes)
    if augmented_list != []:
        genesetlibrary[term] = sorted(genes+augmented_list)
    else:
        pass
    time.sleep(0.25)

In [None]:
%%appyter code_exec
filename = {{gmt_file}}.split('.')[0]+'_augmented.gmt'
with open(filename, 'w') as f:
    for k,v in genesetlibrary.items():
        print(k+"\t",*v,sep='\t',file=f)

In [None]:
display(HTML(f'<style>p{{font-size:18px}}</style><p><b>Augmented file download link:<b></p>\
            <p><a target="_blank" href="{filename}">{filename}</a></p>'))