In [None]:
import sys
sys.path.append("../..")
from IPython.display import display
%matplotlib inline

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr, spearmanr
import numpy as np

import magine.data.tools as dt

from magine.plotting.heatmaps import  heatmap_from_array
from magine.plotting.wordcloud_tools import create_wordcloud
from magine.plotting.venn_diagram_maker import create_venn2, create_venn3
from magine.plotting.species_plotting import plot_species

In [None]:
# load the experimental data
from exp_data import exp_data


# Visualize experimental data


In [None]:
display(exp_data.create_table_of_data())
display(exp_data.create_table_of_data(sig=True))
display(exp_data.create_table_of_data(sig=True, unique=True))

In [None]:
v_plot = exp_data.label_free.volcano_plot(save_name='lf_volcano')
hist_plot = exp_data.create_histogram_measurements('label_free', 'hist')

In [None]:
data = exp_data.data.copy()
data = data[data['source'].isin(['label_free', 'rna_seq'])]
data = dt.log2_normalize_df(data, 'fold_change')

In [None]:
g = sns.PairGrid(data,
                 x_vars=['sample_id'],
                 y_vars=['fold_change', 'p_value'],
                 hue='source',
                 aspect=1.25, size=3.5)
g.map(sns.violinplot, palette="pastel", split=True,);
g.fig.savefig('violin_plot-lf_rna_over_time.png', dpi=300)

In [None]:
lf = exp_data.label_free.copy()
lf.log2_normalize_df(inplace=True)

g = sns.PairGrid(lf,
                 x_vars=['sample_id'],
                 y_vars=['fold_change', 'p_value'],
                 hue='source',
                 aspect=1.25,
                 size=3.5
                )

g.map(sns.violinplot, palette="pastel", split=True, order=sorted(lf['sample_id'].unique()));
g.fig.savefig('violin_plot_lf.png', dpi=300)


In [None]:
rna_seq = exp_data.rna_seq.copy()
for i, j in zip(rna_seq.sample_ids, rna_seq.up_by_sample):
    print(i, len(j))
for i, j in zip(rna_seq.sample_ids, rna_seq.down_by_sample):
    print(i, len(j))
    
exp_data.time_series_volcano(exp_data_type='rna_seq', save_name='rna_seq_volcano_over_time')

In [None]:
rna_seq.volcano_plot('rna_seq_volcano');
rna_seq.log2_normalize_df(inplace=True)
g = sns.PairGrid(rna_seq,
                 x_vars=['sample_id'],
                 y_vars=['fold_change', 'p_value'],
                 hue='source',
                 aspect=1.25,
                 size=4.5
                )

g.map(sns.violinplot, palette="pastel", split=True, order=sorted(rna_seq['sample_id'].unique()));
g.fig.savefig('violin_plot_rna_seq.png', dpi=300)

In [None]:
def create_pie(data, s_name):
    x = len(data.id_list)
    y = len(data.sig.id_list)
    total = x+y
    explode=(0.05, 0.05)
    labels=["Measured", "Significant"]
    fig = plt.figure(figsize=(3,3))
    ax = fig.add_subplot(111)
    wedges, texts, autotexts = ax.pie([x,y], explode=explode, 
                                      textprops={'fontsize':16},
                                      autopct=lambda(p): '{:.0f}'.format(p * total / 100),
                                      shadow=True, startangle=140)
    
    plt.setp(autotexts, size=20)
    plt.axis('equal')
    plt.savefig('{}_pie.png'.format(s_name), dpi=300, bbox_inches='tight')
    plt.show()
    plt.close()
create_pie(exp_data.label_free, 'lf')
create_pie(exp_data.rna, 'rna_seq')
create_pie(exp_data.silac, 'silac')
create_pie(exp_data.ph_silac, 'ph_silac')

In [None]:
met = exp_data.compounds.copy()
proteins = exp_data.proteins.copy()
rna_seq = exp_data.rna.copy()

In [None]:
met_sig = met.filter_by_minimum_sig_columns(
    index='identifier', 
    columns='sample_id', 
    min_terms=4
)
fig = heatmap_from_array(met_sig, cluster_row=True, convert_to_log=True, 
                         index='label', values='fold_change', 
                         columns='sample_id', div_colors=True,
                         fig_size=(8, 12), num_colors=11)


plt.savefig('metabolomics_clustered.png', dpi=300, bbox_inches='tight')

In [None]:
proteins.loc[proteins['fold_change']>10, 'fold_change']=10
proteins.loc[proteins['fold_change']<-10, 'fold_change']=-10

proteins_sig = proteins.filter_by_minimum_sig_columns(
    index='label', 
    columns='sample_id', 
    min_terms=3
)
fig = heatmap_from_array(proteins_sig, cluster_row=True, convert_to_log=True, 
                         index='identifier', values='fold_change', 
                         columns='sample_id', div_colors=True, fig_size=(8, 8),  num_colors=11)

fig.fig.savefig('proteins_clustered_time.png', dpi=250, bbox_inches='tight')

In [None]:
fig = heatmap_from_array(rna_seq.sig, convert_to_log=True, cluster_row=False, 
                         index='identifier', values='fold_change', 
                         columns='sample_id', div_colors=True, fig_size=(8, 8), num_colors=21)

In [None]:
create_venn2(exp_data.rna.sig.id_list,
             exp_data.proteins.sig.id_list, 
             'RNA', 'Protein', 'venn_rna_and_protein');

In [None]:
lf = exp_data.label_free.sig.id_list
silac = exp_data.silac.sig.id_list
phsilac = exp_data.ph_silac.sig.id_list

create_venn3(lf, silac, phsilac, 
             'LF', 'SILAC', 'ph-SILAC', 'venn_proteomics');

In [None]:
hilic = exp_data.HILIC.sig.id_list
rplc = exp_data.C18.sig.id_list

create_venn2(hilic, rplc, 'HILIC', 'RPLC', 'venn_metabolomics');

In [None]:
exp_data.species.plot_species(
    ['VDAC1', 'TRIM28', 'DKC1'], 
    plot_type='matplotlib', 
    save_name='proteins_timecoure_example', 
    image_format='png'
);

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
for i in exp_data.rna.sig.by_sample:
    print(len(i))
print(exp_data.rna.sig.sample_ids)

In [None]:
from magine.enrichment.enrichr import Enrichr


e = Enrichr()

In [None]:
down_tf = e.run_samples(exp_data.rna.sig.down_by_sample,
                        sample_ids=exp_data.rna.sig.sample_ids,
                   database='ENCODE_and_ChEA_Consensus_TFs_from_ChIP-X')

In [None]:
down_tf['significant_flag'] = False
crit = (down_tf['adj_p_value']<0.05) & (down_tf['combined_score']>1) 
down_tf.loc[crit, 'significant_flag'] = True

down_tf.filter_by_minimum_sig_columns(columns='sample_id', min_terms=1, inplace=True)

down_tf.remove_redundant(level='all', inplace=True)
down_tf.dist_matrix(level='all');
down_tf.sort_values('combined_score', inplace=True, ascending=False)

In [None]:
down_tf.head(15)

# UBTF down regulation is necessary for cisplatin induced apoptosis
*Here we show that displacement of UBF and ablation of rRNA synthesis are very early effects of cisplatin treatment, and that in the absence of cisplatin, elimination of UBF protein is sufficient to induce fully penetrant apoptotic cell death.*

https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4695006/


In [None]:
df = e.run(down_tf.term_to_genes('ubtf_encode'))
df.head(15)

In [None]:
fig = heatmap_from_array(down_tf, convert_to_log=True, cluster_row=False, 
                         index='term_name', values='combined_score', 
                         columns='sample_id', div_colors=True, 
                         annotate_sig=True,
                         fig_size=(8, 12), num_colors=11)

In [None]:
for i in exp_data.rna.sig.up_by_sample:
    print(len(i))
    

In [None]:
up_tf = e.run_samples(exp_data.rna.sig.up_by_sample,
                   sample_ids=exp_data.rna.sig.sample_ids,
                   database='ENCODE_and_ChEA_Consensus_TFs_from_ChIP-X')


up_tf['significant_flag'] = False
crit = (up_tf['adj_p_value']<=0.05) & (up_tf['combined_score']>1) 
up_tf.loc[crit, 'significant_flag'] = True

up_tf.filter_by_minimum_sig_columns(columns='sample_id', min_terms=1, inplace=True)

up_tf.remove_redundant(level='all', inplace=True)
up_tf.dist_matrix(level='all');
up_tf.sort_values('combined_score', inplace=True, ascending=False)
up_tf.head(10)

fig = heatmap_from_array(up_tf, convert_to_log=True, cluster_row=False, 
                         index='term_name', values='combined_score', 
                         columns='sample_id', div_colors=True, 
                         annotate_sig=True,
                         fig_size=(8, 12), num_colors=11)