In [None]:
#%%appyter init
from appyter import magic
magic.init(lambda _=globals: _())

In [None]:
# imports
import numpy as np
import pandas as pd
import matplotlib.cm as cm
import matplotlib.colors as colors
from IPython.display import HTML

# bokeh
from bokeh.io import output_notebook
from bokeh.plotting import figure, show
from bokeh.models import HoverTool, ColumnDataSource

# display graphics
output_notebook()

# My Gene's Expression in Type 2 Diabetes Transcriptomics Signatures

In [None]:
%%appyter hide
{% do SectionField(
    name = 'INPUT',
    title = 'Input Parameters',
    subtitle = 'Fill in the species and gene of interest',
    img = 'mini_plot.png'
)%}

In [None]:
%%appyter code_exec
{% set species_input = TabField(
    name = 'species_input',
    label = 'Species of Interest',
    default = 'Human',
    description = 'Select the species of interest.',
    section = 'INPUT',
    choices = {
        'Human': [
            AutocompleteField(
                name = 'human_gene',
                label = 'Human Gene of Interest',
                default = 'SLC2A2',
                description = 'Enter the gene symbol of interest (human).',
                file_path = 'https://appyters.maayanlab.cloud/storage/Gene_Expression_T2D_Signatures/human_genes.json'
            )
        ],
        'Mouse': [
            AutocompleteField(
                name = 'mouse_gene',
                label = 'Mouse Gene of Interest',
                default = 'CD29',
                description = 'Enter the gene symbol of interest (mouse).',
                file_path = 'https://appyters.maayanlab.cloud/storage/Gene_Expression_T2D_Signatures/mouse_genes.json'
            )
        ]
    }
)%}

In [None]:
%%appyter code_exec
species = "{{ species_input.raw_value }}"
gene = {{ species_input.value[0] }}
input_gene = {{ species_input.value[0] }}

In [None]:
# import preprocessed signature data
root_path = 'https://appyters.maayanlab.cloud/storage/Gene_Expression_T2D_Signatures/'
pval_df = pd.read_csv(root_path + f"all_{species.lower()}_pval.tsv", sep='\t', index_col=0)
fc_df = pd.read_csv(root_path + f"all_{species.lower()}_fc.tsv", sep='\t', index_col=0)

In [None]:
# standardize gene format
if species == "Human":
    gene = gene.upper().replace('-','')
    input_gene = input_gene.upper().replace('-','')

# find gene synonym, if necessary 
sig_genes = pval_df.index.tolist()
gene_info = {
    'Human': 'ftp://ftp.ncbi.nih.gov/gene/DATA/GENE_INFO/Mammalia/Homo_sapiens.gene_info.gz',
    'Mouse': 'ftp://ftp.ncbi.nih.gov/gene/DATA/GENE_INFO/Mammalia/Mus_musculus.gene_info.gz'
}

# store if synonym was used
has_syn = False

if gene not in sig_genes:
    gene_df = pd.read_csv(gene_info[species], sep='\t', compression='gzip')[['Symbol', 'Synonyms']]
    gene_map_ind = []
    gene_map_val = []
    for row in gene_df.itertuples():
        if row.Synonyms != '-':
            syns = row.Synonyms.split('|')
            for syn in syns:
                syn = syn.split(':')[1] if syn.find(':') != -1 else syn
                gene_map_ind.append(syn)
                gene_map_val.append(row.Symbol)
    gene_map = pd.DataFrame.from_records(
        zip(gene_map_ind, gene_map_val), 
        columns=['synonym', 'symbol']
    ).set_index('synonym')
    syn_counts = gene_map.index.value_counts()
    gene_map = gene_map.drop(syn_counts[syn_counts > 1].index)
    try:
        gene = gene_map.loc[input_gene, 'symbol']
        has_syn = True
        display(HTML(f"<div style='font-size:1.5rem;padding:1.5rem 0;'>{input_gene} not in signature data, using gene synonym {gene}.</div>"))
    except:
        display(HTML(f"<div style='font-size:2rem;padding:2rem 0;'>{input_gene} not found in signature data and no synonyms found either. Please check your spelling, or try again later with a synonym."))
        has_syn = None

In [None]:
# extract gene-specific data
if has_syn != None:
    comb_df = pd.DataFrame()
    comb_df['sig'] = pval_df.columns.tolist()
    comb_df['pval'] = pval_df.loc[gene].tolist()
    comb_df['logpv'] = np.negative(np.log10(comb_df['pval']))
    comb_df['fc'] = fc_df.loc[gene].tolist()

In [None]:
# configure color scheme
if has_syn != None:
    red_map = cm.get_cmap('Reds_r')
    red_norm = colors.Normalize(vmin=-0.25, vmax=1)
    blue_map = cm.get_cmap('Blues_r')
    blue_norm = colors.Normalize(vmin=-0.25, vmax=1)

    def map_color(fc, pv):
        if fc < 0:
            return colors.to_hex(red_map(red_norm(pv)))
        elif fc == 0:
            return '#808080'
        else:
            return colors.to_hex(blue_map(blue_norm(pv)))

    plot_colors = [map_color(r.fc, r.pval) for r in comb_df.itertuples()]
    sizes = [12 if r.pval < 0.05 else 6 for r in comb_df.itertuples()]

In [None]:
if has_syn != None:
    # generate data source
    data_source = ColumnDataSource(
        data=dict(
            x = comb_df['fc'],
            y = comb_df['logpv'],
            sig = comb_df['sig'],
            pval = comb_df['pval'], 
            fc = comb_df['fc'], 
            colors = plot_colors, 
            sizes = sizes
        )
    )
    # create hover tooltip
    tools = [
        ("Signature", "@sig"),
        ("P-Value", "@pval"),
        ("Fold Change", "@fc"),
    ]
    # generate plot
    plot = figure(
        plot_width=700,
        plot_height=500,
        tooltips=tools
    )
    plot.circle(
        'x', 'y', 
        size='sizes',
        alpha=0.7, 
        line_alpha=0,
        line_width=0.01, 
        source=data_source,
        fill_color='colors', 
        name='t2d_expression_volcano_plot'
    )
    plot.xaxis.axis_label = 'Fold Change'
    plot.yaxis.axis_label = '-log10(p-value)'
    if has_syn:
        plot.title.text = f"Differential Expression of {gene} (synonym {input_gene}) in Type 2 Diabetes Transcriptomics Signatures"
    else:
        plot.title.text = f"Differential Expression of {gene} in Type 2 Diabetes Transcriptomics Signatures"
    plot.title.align = 'center'
    plot.title.text_font_size = '14px'

The volcano plot below positions each of the Type 2 Diabetes signatures according to the gene-specific fold change (x-position) and -log(p-value) (y-position). **Red** points indicate signatures where the fold change of the chosen gene was < 0, **blue** points indicate fold change > 0, and **gray** points indicate fold change == 0. Signatures where the specified gene was significantly expressed in either direction are denoted by darker color and larger point size. 

Hover over any point to display the corresponding signature name, the p-value, and the fold change. 

Use the toolbar on the right side of the plot to pan, zoom, or save the plot.

In [None]:
if has_syn != None:
    show(plot)

In [None]:
if has_syn != None:
    # create tables of significant results with links to GEO 
    def geo_link(sig_name):
        gse_id = sig_name.split('_')[0].replace('* ', '')
        geo_path = 'https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc='
        return f'<a target="_blank" href="{geo_path}{gse_id}">{gse_id}</a>'

    up_comb_df = comb_df[comb_df['fc'] > 0].sort_values(by='pval', ascending=True)[:10]
    up_comb_df = up_comb_df.drop(columns='logpv')
    up_comb_df['sig'] = up_comb_df.apply(lambda row: f"* {row.sig}" if row.pval < 0.05 else row.sig, axis=1)
    up_comb_df = up_comb_df.rename(columns={'sig': 'Signature', 'pval': 'P-value', 'fc': 'Fold Change'})
    up_comb_df['Link to GEO Study'] = up_comb_df['Signature'].apply(geo_link)

    dn_comb_df = comb_df[comb_df['fc'] < 0].sort_values(by='pval', ascending=True)[:10]
    dn_comb_df = dn_comb_df.drop(columns='logpv')
    dn_comb_df['sig'] = dn_comb_df.apply(lambda row: f"* {row.sig}" if row.pval < 0.05 else row.sig, axis=1)
    dn_comb_df = dn_comb_df.rename(columns={'sig': 'Signature', 'pval': 'P-value', 'fc': 'Fold Change'})
    dn_comb_df['Link to GEO Study'] = dn_comb_df['Signature'].apply(geo_link)

In [None]:
if has_syn != None:
    if has_syn:
        display(HTML(f'<div style="font-size:1.25rem;padding=0.5rem;"><b>Top 10 signatures where {gene} (synonym {input_gene}) is up-regulated</b></div>'))
    else:
        display(HTML(f'<div style="font-size:1.25rem;padding=0.5rem;"><b>Top 10 signatures where {gene} is up-regulated</b></div>'))
    display(HTML(f'<div>Asterisk (*) denotes significance (p < 0.05)</div>'))
    display(HTML(up_comb_df.to_html(escape=False,index=False)))

In [None]:
if has_syn != None:
    if has_syn:
        display(HTML(f'<div style="font-size:1.25rem;padding=0.5rem;"><b>Top 10 signatures where {gene} (synonym {input_gene}) is down-regulated</b></div>'))
    else:
        display(HTML(f'<div style="font-size:1.25rem;padding=0.5rem;"><b>Top 10 signatures where {gene} is up-regulated</b></div>'))
    display(HTML(f'<div>Asterisk (*) denotes significance (p < 0.05)</div>'))
    display(HTML(dn_comb_df.to_html(escape=False,index=False)))