In [None]:
#%%appyter init
from appyter import magic
magic.init(lambda _=globals: _())

In [None]:
import pandas as pd

# Suppress command-line output
import contextlib2
import io

# Display
import IPython
from IPython.display import display, Markdown, HTML

# For Enrichr
import requests, json


In [None]:
%%appyter hide_code_exec
{% do SectionField(
    name='Data_Section',
    title='Load your Data',
    subtitle='Upload data to rank genes'
    
) %}

{% do SectionField(
    name='Enrichr_Section',
    title='Enrichment Analysis',
    subtitle='Submit up and down genes for enrichment analysis using Enrichr'
    
) %}

In [None]:
%%appyter code_exec
{% set readcounts_filename = FileField(
    name='readcounts_filename', 
    label='Upload read counts as a .tsv or tab-separated .txt file', 
    default='GSE158298_readcounts.txt',

    examples={'GSE158298_readcounts.txt': url_for('static', filename = 'GSE158298_readcounts.txt')}, 
    description='Upload data', 
    section='Data_Section')

%}

{% set negcontrol_filename = FileField(
    name='negcontrol_filename', 
    label='Upload list of control sgRNAs (one per line)', 
    default='GSE158298_negativecontrol.txt',

    examples={'GSE158298_negativecontrol.txt': url_for('static', filename = 'GSE158298_negativecontrol.txt')}, 
    description='Upload negative control sgRNAs', 
    section='Data_Section')

%}

{% set control_names = StringField(
    name='control_names', 
    label='Column labels for control, separated by commas', 
    default='PreInfection', 
    description='class column name of data', 
    section='Data_Section')
%}

{% set treatment_names = StringField(
    name='treatment_names', 
    label='Column labels for treatment, separated by commas', 
    default='SARS2MOI001', 
    description='Column labels for treatment', 
    section='Data_Section')
%}

{% set paired_samples = BoolField(
    name='paired_samples', 
    label='Paired samples? (make sure control and treatment sample indices are aligned)', 
    default= "true",
    description='Check if samples are paired', 
    section='Data_Section',
) 
%}

In [None]:
%%appyter code_exec


{% set topk_percent = IntField(
    name='topk_percent', 
    label='Percentage of top-ranked genes to use for enrichment analysis', 
    min=1, 
    max=5, 
    default=1, 
    description='Choose the percentage of top-ranked genes to use for enrichment analysis', 
    section='Enrichr_Section')
%}

In [None]:
%%appyter code_exec

readcounts_filename = "{{readcounts_filename.value}}"
negcontrol_filename = "{{negcontrol_filename.value}}"
control_names = "{{control_names.value}}"
treatment_names = "{{treatment_names.value}}"
paired_samples = {{paired_samples.value}}
topk_percent = {{topk_percent.value}}

# Ranking sgRNAs and genes

Ranking sgRNAs and genes from read counts using MaGeCK (Li et al. 2014)


In [None]:
if paired_samples == True:
    for treatment, control in zip(treatment_names.split(","), control_names.split(",")):
        with contextlib2.redirect_stdout(io.StringIO()):
            !mageck test -k $readcounts_filename -t $treatment -c $control --paired --control-sgrna $negcontrol_filename -n $treatment
else:
    for treatment, control in zip(treatment_names.split(","), control_names.split(",")):
        with contextlib2.redirect_stdout(io.StringIO()):
            !mageck test -k $readcounts_filename -t $treatment -c $control --control-sgrna $negcontrol_filename -n $treatment


# Enrichment Analysis

Enrichment analysis using Enrichr (Kuleshov et al. 2016)


In [None]:
def get_enrichr_url(gene_list, description):
    ENRICHR_URL = 'http://amp.pharm.mssm.edu/Enrichr/addList' 
    genes_str = '\n'.join(gene_list)
    payload = {
        'list': (None, genes_str),
        'description': (None, description)
    }

    response = requests.post(ENRICHR_URL, files=payload)
    if not response.ok:
        raise Exception('Error analyzing gene list')

    #time.sleep(0.5)

    data = json.loads(response.text)

    short_id = data['shortId']
    url = "https://amp.pharm.mssm.edu/Enrichr/enrich?dataset={}".format(short_id)
    return (url)

In [None]:

for treatment in treatment_names.split(","):
    gene_fn = "{0}.gene_summary.txt".format(treatment)
    output_df = pd.read_csv(gene_fn, sep = "\t")
    output_df.head()
    
    topk_genes = int(topk_percent / 100 * len(output_df))

    neg_genes_df = output_df[['id']].head(topk_genes)
    #neg_genes_df.head()
    
    pos_df = output_df.sort_values(by=['pos|rank'])
    pos_genes_df = pos_df[['id']].head(topk_genes)

    #pos_genes_df.head()

    pos_url = get_enrichr_url(pos_genes_df['id'].tolist(), "Top {0}% up genes for {1}".format(topk_percent, treatment))
    neg_url = get_enrichr_url(neg_genes_df['id'].tolist(), "Top {0}% down genes for {1}".format(topk_percent, treatment))
    
    display(Markdown("**Top {0} ({3}%) up genes for {1}**: <{2}>".format(topk_genes, treatment, pos_url, topk_percent)))
    display(Markdown("**Top {0} ({3}%) down genes for {1}**: <{2}>".format(topk_genes, treatment, neg_url, topk_percent)))


