# Phosphoproteomics GSEA and Heatmap

This notebook looks at the significant phosphosites in at least one cancer. Pancancer heatmaps are created with circle size showing significance and color showing differences in median.

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
import gseapy as gp

import cptac
import cptac.utils as u
import plot_utils as p

# Make heatmap of sites sig in mult cancers

In [4]:
mult_df = pd.read_csv('..\Make_Tables\csv\mult_sig_heatmap_phospho.csv')
print('sig in mult cancers:', len(mult_df.Phospho.unique()))

sig in mult cancers: 3


In [5]:
# Get list of just proteins
mult_df['Gene'] = mult_df['Phospho']
mult_df['Gene'] = mult_df['Gene'].replace(r'(_[\w][\d]*[\w]?[\d]*[\w]?[\d]*)*$', '', regex=True)
prot_list = list(mult_df.Gene.unique()) 
mult_df.head()

Unnamed: 0,Phospho,P_Value,Medians,Cancer,Gene
0,SOS1_S1161,5.5e-05,1.3025,LUAD,SOS1
1,MKNK2_S220,0.015097,1.3387,LUAD,MKNK2
2,NFKB2_S858,0.015097,1.40475,LUAD,NFKB2
3,MKNK2_S220,0.001348,0.947,EC,MKNK2
4,NFKB2_S858,0.001348,0.69285,EC,NFKB2


In [6]:
prot_enr = gp.enrichr(gene_list = prot_list, description='Tumor_partition', gene_sets='Reactome_2016', 
                       outdir='/Enrichr')



In [7]:
prot_enr.res2d[['Term', 'Overlap', 'Adjusted P-value', 'Genes']].head(7)

Unnamed: 0,Term,Overlap,Adjusted P-value,Genes
0,Interleukin-1 processing Homo sapiens R-HSA-44...,1/7,1.0,NFKB2
1,IkBA variant leads to EDA-ID Homo sapiens R-HS...,1/7,0.802991,NFKB2
2,Signaling by Interleukins Homo sapiens R-HSA-4...,2/392,0.578659,SOS1;NFKB2
3,IKBKG deficiency causes anhidrotic ectodermal ...,1/8,0.458829,NFKB2
4,EGFR Transactivation by Gastrin Homo sapiens R...,1/9,0.412926,SOS1
5,SHC-related events triggered by IGF1R Homo sap...,1/9,0.344105,SOS1
6,Signal attenuation Homo sapiens R-HSA-74749,1/10,0.327703,SOS1


In [8]:
p.plotCircleHeatMap(mult_df, circle_var = 'P_Value', color_var='Medians', x_axis= 'Phospho', y_axis = 'Cancer',
                    plot_height=300, plot_width= 700, legend_min = .01, legend_max = 0.05, font_size = 14) #,save_png = '.png')

# GSEA of sites sig in one cancer

In [None]:
sig_df = pd.read_csv('Make_Tables\csv\sig_pval_heatmap_phospho.csv')
print('sig in one cancer:', len(sig_df.Phospho.unique()))

In [None]:
# Get list of just proteins
sig_df['Gene'] = sig_df['Phospho']
sig_df['Gene'] = sig_df['Gene'].replace(r'(_[\w][\d]*[\w]?[\d]*[\w]?[\d]*)*$', '', regex=True)
prot_list = list(sig_df.Gene.unique()) 
sig_df.head()

In [None]:
prot_enr = gp.enrichr(gene_list = one, description='Tumor_partition', gene_sets='Reactome_2016', 
                       outdir='/Enrichr')

In [None]:
prot_enr.res2d[['Term', 'Overlap', 'Adjusted P-value', 'Genes']].head(15)

In [None]:
prot_enr.res2d['Term']

# Make Heatmap sites sig in one cancer

In [None]:
i = 4
genes = prot_enr.res2d.Genes[i]
g_list = genes.split(';')
print(prot_enr.res2d.Term[i])
print('Num genes:', len(g_list))

In [None]:
# only one cancer

c = 'Endo' #'Luad'
sig_df = sig_df.loc[sig_df['P_Value'] < 0.05]
one_df = sig_df.loc[sig_df['Cancer'] == c]
len(one_df)
one = list(one_df.Gene)

In [None]:
get = sig_df['Gene'].isin(g_list)
plot_df = sig_df[get]
plot_df = plot_df.loc[sig_df['P_Value'] < 0.05]

In [None]:
p.plotCircleHeatMap(plot_df, circle_var = 'P_Value', color_var='Medians', x_axis= 'Phospho', y_axis = 'Cancer',
                    plot_height=400, plot_width= 1500, legend_min = .01, legend_max = 0.05) #,save_png = '.png')