In [None]:
import sys
sys.path.append("../..")
from IPython.display import display
%matplotlib inline

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr, spearmanr
import numpy as np

import magine.data.tools as dt

from magine.plotting.heatmaps import  heatmap_from_array
from magine.plotting.wordcloud_tools import create_wordcloud
from magine.plotting.venn_diagram_maker import create_venn2, create_venn3
from magine.plotting.species_plotting import plot_species

In [None]:
# load the experimental data
from exp_data import exp_data

# Visualize experimental data


In [None]:
display(exp_data.create_table_of_data(save_name='table_measured'))
display(exp_data.create_table_of_data(sig=True, save_name='table_sig_measured'))
display(exp_data.create_table_of_data(sig=True, unique=True, 
                                      save_name='table_sig_unique_measured'))

In [None]:
v_plot = exp_data.label_free.volcano_plot(save_name='lf_volcano')
hist_plot = exp_data.create_histogram_measurements('label_free', 'hist')

In [None]:
data = exp_data.data.copy()
data = data[data['source'].isin(['label_free', 'rna_seq'])]
data = dt.log2_normalize_df(data, 'fold_change')

In [None]:
g = sns.PairGrid(data,
                 x_vars=['sample_id'],
                 y_vars=['fold_change', 'p_value'],
                 hue='source',
                 aspect=1.25, size=3.5)
g.map(sns.violinplot, palette="pastel", split=True,);
g.fig.savefig('violin_plot-lf_rna_over_time.png', dpi=300)

In [None]:
lf = exp_data.label_free
lf = dt.log2_normalize_df(lf, 'fold_change')
g = sns.violinplot(x="sample_id", y="fold_change", data=lf,
                   palette="Set3", order=sorted(lf['sample_id'].unique()) )

plt.savefig("violin_plot.png", dpi=300)
display(exp_data.label_free['source'].unique())

In [None]:
met = exp_data.compounds.copy()
proteins = exp_data.proteins.copy()
rna_seq = exp_data.rna.copy()

In [None]:
met_sig = met.filter_by_minimum_sig_columns(
    index='identifier', 
    columns='sample_id', 
    min_terms=4
)
fig = heatmap_from_array(met_sig, cluster_row=True, convert_to_log=True, 
                         index='label', values='fold_change', 
                         columns='sample_id', div_colors=True,
                         fig_size=(8, 12), num_colors=11)


plt.savefig('metabolomics_clustered.png', dpi=300, bbox_inches='tight')

In [None]:
proteins.loc[proteins['fold_change']>10, 'fold_change']=10
proteins.loc[proteins['fold_change']<-10, 'fold_change']=-10

proteins_sig = proteins.filter_by_minimum_sig_columns(
    index='label', 
    columns='sample_id', 
    min_terms=3
)
fig = heatmap_from_array(proteins_sig, cluster_row=True, convert_to_log=True, 
                         index='identifier', values='fold_change', 
                         columns='sample_id', div_colors=True, fig_size=(8, 8),  num_colors=11)

fig.fig.savefig('proteins_clustered_time.png', dpi=250, bbox_inches='tight')

In [None]:
fig = heatmap_from_array(rna_seq.sig, convert_to_log=True, cluster_row=False, 
                         index='identifier', values='fold_change', 
                         columns='sample_id', div_colors=True, fig_size=(8, 8), num_colors=11)

In [None]:
create_venn2(exp_data.rna.sig.id_list,
             exp_data.proteins.sig.id_list, 
             'RNA', 'Protein', 'venn_rna_and_protein');

In [None]:
lf = exp_data.label_free.sig.id_list
silac = exp_data.silac.sig.id_list
phsilac = exp_data.ph_silac.sig.id_list

create_venn3(lf, silac, phsilac, 
             'LF', 'SILAC', 'ph-SILAC', 'venn_proteomics');

In [None]:
hilic = exp_data.HILIC.sig.id_list
rplc = exp_data.C18.sig.id_list

create_venn2(hilic, rplc, 'HILIC', 'RPLC', 'venn_metabolomics');

In [None]:
exp_data.plot_species(
    ['VDAC1', 'TRIM28', 'DKC1'], 
    plot_type='matplotlib', 
    save_name='proteins_timecour_example', 
    image_format='png'
);