# Phosphoproteomics GSEA and Heatmap

This notebook looks at the significant phosphosites in at least one cancer. Pancancer heatmaps are created with circle size showing significance and color showing differences in median.

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
import gseapy as gp
import re
import sys 

import cptac
import cptac.utils as u
import plot_utils as p

# Sig in mult cancers

In [112]:
mult_df = pd.read_csv('Make_Tables\csv\mult_sig_heatmap_phospho.csv')
print('sig in mult cancers:', len(mult_df.Phospho.unique()))

sig in mult cancers: 12


In [113]:
# Get list of just proteins
mult_df['Gene'] = mult_df['Phospho']
mult_df['Gene'] = mult_df['Gene'].replace(r'(_[\w][\d]*[\w]?[\d]*[\w]?[\d]*)*$', '', regex=True)
prot_list = list(mult_df.Gene.unique()) 
mult_df.head()

Unnamed: 0,Phospho,P_Value,Medians,Cancer,Gene
0,DNMBP_S1436,0.000425,1.6316,Luad,DNMBP
1,SOS1_S1161,0.000547,1.3025,Luad,SOS1
2,PARP4_S1186,0.0092,1.542,Luad,PARP4
3,KIAA1217_S1568,0.022649,1.6913,Luad,KIAA1217
4,PDAP1_S170,0.025903,-1.01535,Luad,PDAP1


In [114]:
prot_enr = gp.enrichr(gene_list = prot_list, description='Tumor_partition', gene_sets='Reactome_2016', 
                       outdir='/Enrichr')



In [115]:
prot_enr.res2d.head()

Unnamed: 0,Term,Overlap,P-value,Adjusted P-value,Old P-value,Old Adjusted P-value,Odds Ratio,Combined Score,Genes,Gene_set
0,Interleukin-1 processing Homo sapiens R-HSA-44...,1/7,0.004193,1.0,0,0,238.095238,1303.413145,NFKB2,Reactome_2016
1,IkBA variant leads to EDA-ID Homo sapiens R-HS...,1/7,0.004193,1.0,0,0,238.095238,1303.413145,NFKB2,Reactome_2016
2,IKBKG deficiency causes anhidrotic ectodermal ...,1/8,0.004791,1.0,0,0,208.333333,1112.724603,NFKB2,Reactome_2016
3,Type I hemidesmosome assembly Homo sapiens R-H...,1/9,0.005388,1.0,0,0,185.185185,967.32767,PLEC,Reactome_2016
4,EGFR Transactivation by Gastrin Homo sapiens R...,1/9,0.005388,1.0,0,0,185.185185,967.32767,SOS1,Reactome_2016


# Sig in one cancer

In [117]:
sig_df = pd.read_csv('Make_Tables\csv\sig_pval_heatmap_phospho.csv')
print('sig in one cancer:', len(sig_df.Phospho.unique()))

sig in one cancer: 889


In [118]:
# Get list of just proteins
sig_df['Gene'] = sig_df['Phospho']
sig_df['Gene'] = sig_df['Gene'].replace(r'(_[\w][\d]*[\w]?[\d]*[\w]?[\d]*)*$', '', regex=True)
prot_list = list(sig_df.Gene.unique()) 
sig_df.head()

Unnamed: 0,Phospho,P_Value,Medians,Cancer,Gene
0,EEF1B2_S8,0.000107,2.1124,Luad,EEF1B2
1,DNMBP_S1436,0.000425,1.6316,Luad,DNMBP
2,CTBP2_S905,0.00045,1.2298,Luad,CTBP2
3,SOS1_S1161,0.000547,1.3025,Luad,SOS1
4,XPO5_S416,0.000726,2.657,Luad,XPO5


In [170]:
prot_enr = gp.enrichr(gene_list = one, description='Tumor_partition', gene_sets='Reactome_2016', 
                       outdir='/Enrichr')

In [171]:
prot_enr.res2d[['Term', 'Overlap', 'Adjusted P-value', 'Genes']].head(15)

Unnamed: 0,Term,Overlap,Adjusted P-value,Genes
0,Signaling by Rho GTPases Homo sapiens R-HSA-19...,26/367,3.9e-05,ARHGAP5;FGD1;ARHGAP35;AKAP13;XPO1;ABL1;SRGAP2;...
1,Rho GTPase cycle Homo sapiens R-HSA-194840,14/122,0.000113,ARHGEF12;ARHGEF18;ARHGAP5;ARHGAP35;FGD1;VAV2;B...
2,Interactions of Rev with host cellular protein...,8/35,0.000172,RANBP2;NUP214;NPM1;XPO1;TPR;NUP35;NUP153;NUP98
3,SUMO E3 ligases SUMOylate target proteins Homo...,12/96,0.000173,NUP214;RANBP2;TOP2B;MDC1;TPR;NUP35;NUP153;NUP9...
4,SUMOylation Homo sapiens R-HSA-2990846,12/101,0.000242,NUP214;RANBP2;MDC1;TOP2B;TPR;NUP35;NUP153;NUP9...
5,Cell Cycle Homo sapiens R-HSA-1640170,30/566,0.000316,MDC1;GOLGA2;PCM1;XPO1;TPR;RBBP8;E2F4;TP53BP1;B...
6,NEP/NS2 Interacts with the Cellular Export Mac...,7/30,0.000351,RANBP2;NUP214;XPO1;TPR;NUP35;NUP153;NUP98
7,Export of Viral Ribonucleoproteins from Nucleu...,7/31,0.00039,RANBP2;NUP214;XPO1;TPR;NUP35;NUP153;NUP98
8,"Cell Cycle, Mitotic Homo sapiens R-HSA-69278",26/462,0.000371,GOLGA2;PCM1;XPO1;TPR;E2F4;BTRC;SKP2;CLASP1;CDK...
9,Signaling by Robo receptor Homo sapiens R-HSA-...,7/32,0.000393,ENAH;ABL1;NCK2;SRGAP2;SOS1;CLASP1;CLASP2


In [163]:
prot_enr.res2d['Term']

0      Signaling by Rho GTPases Homo sapiens R-HSA-19...
1             Rho GTPase cycle Homo sapiens R-HSA-194840
2      Interactions of Rev with host cellular protein...
3      SUMO E3 ligases SUMOylate target proteins Homo...
4                 SUMOylation Homo sapiens R-HSA-2990846
                             ...                        
741    Extracellular matrix organization Homo sapiens...
742    Metabolism of lipids and lipoproteins Homo sap...
743    Metabolism of amino acids and derivatives Homo...
744    GPCR downstream signaling Homo sapiens R-HSA-3...
745                Metabolism Homo sapiens R-HSA-1430728
Name: Term, Length: 746, dtype: object

# Make Heatmap

In [152]:
i = 4
genes = prot_enr.res2d.Genes[i]
g_list = genes.split(';')
print(prot_enr.res2d.Term[i])
print('Num genes:', len(g_list))

SUMO E3 ligases SUMOylate target proteins Homo sapiens R-HSA-3108232
Num genes: 17


In [168]:
# only one cancer

c = 'Endo' #'Luad'
sig_df = sig_df.loc[sig_df['P_Value'] < 0.05]
one_df = sig_df.loc[sig_df['Cancer'] == c]
len(one_df)
one = list(one_df.Gene)

In [169]:
get = sig_df['Gene'].isin(g_list)
plot_df = sig_df[get]
plot_df = plot_df.loc[sig_df['P_Value'] < 0.05]

In [155]:
p.plotCircleHeatMap(plot_df, circle_var = 'P_Value', color_var='Medians', x_axis= 'Phospho', y_axis = 'Cancer',
                    plot_height=400, plot_width= 1500, legend_min = .01, legend_max = 0.05) #,save_png = '.png')