In [None]:
#%%appyter init
from appyter import magic
magic.init(lambda _=globals: _())

In [None]:
# imports
import numpy as np
import pandas as pd
import matplotlib.cm as cm
import matplotlib.colors as colors
from IPython.display import HTML
import requests

# bokeh
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.models import HoverTool, ColumnDataSource

# display graphics
output_notebook()

In [None]:
%%appyter hide
{% do SectionField(
    name = 'INPUT',
    title = 'Input Parameters',
    subtitle = 'Fill in the species and gene of interest',
    img = 'mini_plot.png'
)%}

In [None]:
%%appyter code_exec
{% set species_input = TabField(
    name = 'species_input',
    label = 'Species of Interest',
    default = 'Human',
    description = 'Select the species of interest.',
    section = 'INPUT',
    choices = {
        'Human': [
            AutocompleteField(
                name = 'human_gene',
                label = 'Human Gene of Interest',
                default = 'SLC2A2',
                description = 'Enter the gene symbol of interest (human).',
                file_path = 'https://appyters.maayanlab.cloud/storage/Gene_Expression_T2D_Signatures/human_genes.json'
            )
        ],
        'Mouse': [
            AutocompleteField(
                name = 'mouse_gene',
                label = 'Mouse Gene of Interest',
                default = 'CD29',
                description = 'Enter the gene symbol of interest (mouse).',
                file_path = 'https://appyters.maayanlab.cloud/storage/Gene_Expression_T2D_Signatures/mouse_genes.json'
            )
        ]
    }
)%}

In [None]:
%%appyter code_exec
species = "{{ species_input.raw_value }}"
gene = {{ species_input.value[0] }}
input_gene = {{ species_input.value[0] }}

# My Gene's Expression in Type 2 Diabetes Transcriptomics Signatures

In [None]:
# import preprocessed signature data
root_path = 'https://appyters.maayanlab.cloud/storage/Gene_Expression_T2D_Signatures/'
# p-values
pval_df_input = pd.read_csv(root_path + f"all_{species.lower()}_pval.tsv", sep='\t', index_col=0)
# fold change
fc_df_input = pd.read_csv(root_path + f"all_{species.lower()}_fc.tsv", sep='\t', index_col=0)
# Bulk RNA-seq Appyter analysis instances
inst_df_input = pd.read_csv(root_path + f"{species.lower()}_instances.tsv", sep='\t', index_col=0)

In [None]:
# standardize gene format
if species == "Human":
    gene = gene.upper().replace('-','')
    input_gene = input_gene.upper().replace('-','')

# store if synonym was used
has_syn = False

# obtain all gene symbols in signature
sig_genes = pval_df_input.index.tolist()

# find gene synonym, if necessary
if gene not in sig_genes:
    gene_info = {
        'Human': 'ftp://ftp.ncbi.nih.gov/gene/DATA/GENE_INFO/Mammalia/Homo_sapiens.gene_info.gz',
        'Mouse': 'ftp://ftp.ncbi.nih.gov/gene/DATA/GENE_INFO/Mammalia/Mus_musculus.gene_info.gz'
    }
    gene_df = pd.read_csv(gene_info[species], sep='\t', compression='gzip')[['Symbol', 'Synonyms']]
    gene_map_ind = []
    gene_map_val = []
    for row in gene_df.itertuples():
        if row.Synonyms != '-':
            syns = row.Synonyms.split('|')
            for syn in syns:
                syn = syn.split(':')[1] if syn.find(':') != -1 else syn
                gene_map_ind.append(syn)
                gene_map_val.append(row.Symbol)
    gene_map = pd.DataFrame.from_records(
        zip(gene_map_ind, gene_map_val), 
        columns=['synonym', 'symbol']
    ).set_index('synonym')
    syn_counts = gene_map.index.value_counts()
    gene_map = gene_map.drop(syn_counts[syn_counts > 1].index)
    try:
        gene = gene_map.loc[input_gene, 'symbol']
        has_syn = True
        display(HTML(f"<div style='font-size:1rem;padding:1rem 0;'>{input_gene} not in signature data, using gene synonym <b>{gene}</b>.</div>"))
    except:
        display(HTML(f"<div style='font-size:1rem;padding:1rem 0;'>{input_gene} not found in signature data and no synonyms found either. Please check your spelling, or try again later with a synonym."))
        has_syn = None

In [None]:
# configure color scheme
red_map = cm.get_cmap('Reds_r')
red_norm = colors.Normalize(vmin=-0.25, vmax=1)
blue_map = cm.get_cmap('Blues_r')
blue_norm = colors.Normalize(vmin=-0.25, vmax=1)

def map_color(fc, pv):
    if fc < 0:
        return colors.to_hex(red_map(red_norm(pv)))
    elif fc == 0:
        return '#808080'
    else:
        return colors.to_hex(blue_map(blue_norm(pv)))

In [None]:
def combine_data(pval_df, fc_df, inst_df, gene):
    # extract and combine data for each gene
    comb_df = pd.DataFrame()
    comb_df['sig'] = pval_df.columns.tolist()
    comb_df['pval'] = pval_df.loc[gene].tolist()
    comb_df['logpv'] = np.negative(np.log10(comb_df['pval']))
    comb_df['fc'] = fc_df.loc[gene].tolist()
    comb_df['inst'] = comb_df['sig'].apply(lambda x: inst_df.loc[x, 'session_id'])
    return comb_df

def make_plot(comb_df, species, gene):
    # create links from Bulk RNA-seq Appyter instance session IDs
    comb_df['inst'] = comb_df['inst'].apply(lambda x: f'https://appyters.maayanlab.cloud/Bulk_RNA_seq/{x}')

    # set color and size for each point on plot
    plot_colors = [map_color(r.fc, r.pval) for r in comb_df.itertuples()]
    sizes = [12 if r.pval < 0.05 else 6 for r in comb_df.itertuples()]

    # generate data source
    data_source = ColumnDataSource(
        data=dict(
            x = comb_df['fc'],
            y = comb_df['logpv'],
            sig = comb_df['sig'],
            pval = comb_df['pval'], 
            fc = comb_df['fc'], 
            inst = comb_df['inst'],
            colors = plot_colors, 
            sizes = sizes
        )
    )
    # create hover tooltip
    tools = [
        ("Signature", "@sig"),
        ("P-Value", "@pval"),
        ("Fold Change", "@fc"),
        ("Bulk RNA-seq Analysis", "@inst")
    ]
    # generate plot and relevant plot labels
    plot = figure(
        plot_width=700,
        plot_height=500,
        tooltips=tools
    )
    plot.circle(
        'x', 'y', 
        size='sizes',
        alpha=0.7, 
        line_alpha=0,
        line_width=0.01, 
        source=data_source,
        fill_color='colors', 
        name=f'{gene}_t2d_expression_volcano_plot'
    )
    plot.xaxis.axis_label = 'Fold Change'
    plot.yaxis.axis_label = '-log10(P-value)'
    plot.title.text = f"Differential Expression of {gene} in {species} Type 2 Diabetes Transcriptomics Signatures"
    plot.title.align = 'center'
    plot.title.text_font_size = '14px'

    show(plot)

The volcano plot below positions each of the Type 2 Diabetes signatures according to the gene-specific fold change (x-position) and -log(p-value) (y-position). Signatures were computed using the R package [limma](https://bioconductor.org/packages/release/bioc/html/limma.html). Fold changes are quantile-normalized log2 fold change values. 

**Red** points indicate signatures where the fold change of the chosen gene was < 0, **blue** points indicate fold change > 0, and **gray** points indicate fold change == 0. Signatures where the specified gene was significantly expressed in either direction are denoted by darker color and larger point size. 

Hover over any point to display the corresponding signature name, the p-value, and the fold change. 

Use the toolbar on the right side of the plot to pan, zoom, or save the plot.

In [None]:
if has_syn != None:
    comb_df_input = combine_data(pval_df_input, fc_df_input, inst_df_input, gene)
    make_plot(comb_df_input, species, gene)

In [None]:
# get GEO links 
def geo_link(sig_name):
    gse_id = sig_name.split('_')[0].replace('* ', '')
    geo_path = 'https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc='
    return f'<a target="_blank" href="{geo_path}{gse_id}">{gse_id}</a>'

def appyter_link(sig_name, inst):
    text = f'Analysis of {sig_name}'
    return f'<a target="_blank" href="{inst}">{text}</a>'

# create tables of significant results with links to GEO 
def make_tables(comb_df):
    up_comb_df = comb_df[comb_df['fc'] > 0].sort_values(by='pval', ascending=True)[:10]
    up_comb_df = up_comb_df.drop(columns='logpv')
    up_comb_df['inst'] = up_comb_df.apply(lambda row: appyter_link(row.sig, row.inst), axis=1)
    up_comb_df['sig'] = up_comb_df.apply(lambda row: f"* {row.sig}" if row.pval < 0.05 else row.sig, axis=1)
    up_comb_df = up_comb_df.rename(columns={'sig': 'Signature', 'pval': 'P-value', 'fc': 'Log2 Fold Change', 'inst': 'Link to Bulk RNA-seq Analysis'})
    up_comb_df['Link to GEO Study'] = up_comb_df['Signature'].apply(geo_link)

    dn_comb_df = comb_df[comb_df['fc'] < 0].sort_values(by='pval', ascending=True)[:10]
    dn_comb_df = dn_comb_df.drop(columns='logpv')
    dn_comb_df['inst'] = dn_comb_df.apply(lambda row: appyter_link(row.sig, row.inst), axis=1)
    dn_comb_df['sig'] = dn_comb_df.apply(lambda row: f"* {row.sig}" if row.pval < 0.05 else row.sig, axis=1)
    dn_comb_df = dn_comb_df.rename(columns={'sig': 'Signature', 'pval': 'P-value', 'fc': 'Log2 Fold Change', 'inst': 'Link to Analysis'})
    dn_comb_df['Link to GEO Study'] = dn_comb_df['Signature'].apply(geo_link)

    return (up_comb_df, dn_comb_df)

The tables below show the top 10 signatures in which the gene of interest was up-regulated or down-regulated, respectively. Links are included to the original GEO studies, as well as to [Bulk RNA-seq Appyter](https://appyters.maayanlab.cloud/#/Bulk_RNA_seq) instances that display the results of performing RNA-seq analysis on the original expression data used to generate each signature.

In [None]:
up_comb_df_input, dn_comb_df_input = make_tables(comb_df_input)

In [None]:
if has_syn != None:
    if has_syn:
        display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top 10 {species} signatures where {gene} (synonym {input_gene}) is up-regulated</b></div>'))
    else:
        display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top 10 {species} signatures where {gene} is up-regulated</b></div>'))
    display(HTML(f'<div>Asterisk (*) denotes significance (p < 0.05)</div>'))
    display(HTML(up_comb_df_input.to_html(escape=False,index=False)))

In [None]:
if has_syn != None:
    if has_syn:
        display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top 10 {species} signatures where {gene} (synonym {input_gene}) is down-regulated</b></div>'))
    else:
        display(HTML(f'<div style="font-size:1rem;padding=1rem;"><b>Top 10 {species} signatures where {gene} is down-regulated</b></div>'))
    display(HTML(f'<div>Asterisk (*) denotes significance (p < 0.05)</div>'))
    display(HTML(dn_comb_df_input.to_html(escape=False,index=False)))

# Alternate Species Analysis

If the chosen gene overlaps between mouse and human species, the volcano plot and results tables will be generated below for signatures from the species not chosen in the input form. 

In [None]:
# get all gene symbols and find overlapping genes
human_genes = requests.get('https://appyters.maayanlab.cloud/storage/Gene_Expression_T2D_Signatures/human_genes.json').json()['human_genes']
mouse_genes = requests.get('https://appyters.maayanlab.cloud/storage/Gene_Expression_T2D_Signatures/mouse_genes.json').json()['mouse_genes']
# convert mouse_genes to uppercase
mouse_genes_upper = [g.upper() for g in mouse_genes]
overlapping_genes = list(set(human_genes).intersection(set(mouse_genes_upper)))

In [None]:
is_overlap = gene.upper() in overlapping_genes
alt_species = 'Mouse' if species == 'Human' else 'Human'
alt_gene = gene.upper() if species == 'Mouse' else gene.lower().capitalize()

if is_overlap:
    display(HTML(f'<div style="font-size:1rem;padding=1rem 0;">Corresponding gene in {alt_species}: <b>{alt_gene}</b>.</div>'))

In [None]:
if is_overlap:
    pval_df_alt = pd.read_csv(root_path + f"all_{alt_species.lower()}_pval.tsv", sep='\t', index_col=0)
    fc_df_alt = pd.read_csv(root_path + f"all_{alt_species.lower()}_fc.tsv", sep='\t', index_col=0)
    inst_df_alt = pd.read_csv(root_path + f"{alt_species.lower()}_instances.tsv", sep='\t', index_col=0)

    comb_df_alt = combine_data(pval_df_alt, fc_df_alt, inst_df_alt, alt_gene)
    make_plot(comb_df_alt, alt_species, alt_gene)

In [None]:
if is_overlap:
    up_comb_df_alt, dn_comb_df_alt = make_tables(comb_df_alt)

In [None]:
if is_overlap:
    display(HTML(f'<div style="font-size:1rem;padding=1rem 0;"><b>Top 10 {alt_species} signatures where {alt_gene} is up-regulated</b></div>'))
    display(HTML(f'<div>Asterisk (*) denotes significance (p < 0.05)</div>'))
    display(HTML(up_comb_df_alt.to_html(escape=False,index=False)))

In [None]:
if is_overlap:
    display(HTML(f'<div style="font-size:1rem;padding=1rem 0;"><b>Top 10 {alt_species} signatures where {alt_gene} is down-regulated</b></div>'))
    display(HTML(f'<div>Asterisk (*) denotes significance (p < 0.05)</div>'))
    display(HTML(dn_comb_df_alt.to_html(escape=False,index=False)))

In [None]:
if not is_overlap:
    display(HTML(f'<div style="font-size:1rem;padding=1rem 0;">Corresponding gene not found in {alt_species}.</div>'))