Phospho for Interacting Proteins

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
import gseapy as gp
import re
import sys 

import cptac
import cptac.utils as u

In [2]:
gbm = cptac.Gbm()
endo = cptac.Endometrial()

Checking that endometrial index is up-to-date...



                                                

In [3]:
ip = u.get_interacting_proteins('PTEN')
len(ip)

28

In [4]:
gene = 'PTEN'

# Gbm

In [5]:
mut_type_gbm = gbm.get_genotype_all_vars(gene)

# merge cnv with genotype all mut type
prot_and_mutations = gbm.join_omics_to_mutations(
    mutations_genes = [gene], omics_df_name = 'phosphoproteomics', omics_genes = ip)
prot_and_mutations = prot_and_mutations[prot_and_mutations.Sample_Status == "Tumor"] # drop Normal samples
prot_and_mutations = gbm.reduce_multiindex(prot_and_mutations, levels_to_drop = 3, flatten = True)

ip_df = prot_and_mutations.iloc[:,:-4] #drop mutation, location cols
mut_type_gbm = mut_type_gbm[['Mutation']] 
merged = ip_df.join(mut_type_gbm) # merge mutation col from function (includes cnv)

# Keep two values to compare
compare = ['Wildtype_Tumor','Deletion']
get = merged['Mutation'].isin(compare)
del_wt = merged[get]
del_wt['Mutation'].value_counts()
del_wt
prot_and_mutations



Name,AKT1_phosphoproteomics_S122S124S126_S*GS*PS*DNSGAEEMEVSLAK,AKT1_phosphoproteomics_S122S129_S*GSPSDNS*GAEEMEVSLAK,AKT1_phosphoproteomics_S124_SGS*PSDNSGAEEMEVSLAK,AKT1_phosphoproteomics_S124S126_SGS*PS*DNSGAEEMEVSLAK,AKT1_phosphoproteomics_S124S126S129_SGS*PS*DNS*GAEEMEVSLAK,AKT1_phosphoproteomics_S124S129_SGS*PSDNS*GAEEMEVSLAK,AKT1_phosphoproteomics_S126_SGSPS*DNSGAEEMEVSLAK,AKT1_phosphoproteomics_S126S129_SGSPS*DNS*GAEEMEVSLAK,AKT1_phosphoproteomics_S129_SGSPSDNS*GAEEMEVSLAK,AKT1_phosphoproteomics_T450_YFDEEFTAQMITIT*PPDQDDSMECVDSER,...,USP7_phosphoproteomics_S49T54_ITQNPVINGNVALS*DGHNT*AEEDMEDDTSWR,USP7_phosphoproteomics_S963_IIGVHQEDELLECLS*PATSR,USP7_phosphoproteomics_T54_ITQNPVINGNVALSDGHNT*AEEDMEDDTSWR,USP7_phosphoproteomics_Y143S149S151_IINY*RDDEKS*FS*RR,XIAP_phosphoproteomics_S361T363_TPS*LT*RR,XIAP_phosphoproteomics_S406_S*LEVLVADLVNAQK,PTEN_Mutation__,PTEN_Location__,PTEN_Mutation_Status__,Sample_Status__
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
C3L-00104,,,-0.266735,,-0.003699,,-0.278996,-0.258196,-0.023265,,...,,-0.232297,-0.669862,,0.066926,,[Missense_Mutation],[p.G251D],Single_mutation,Tumor
C3L-00365,,,,-0.126213,0.167336,,-0.444445,-0.306907,0.249322,,...,-0.628643,-0.133168,0.074903,,,,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
C3L-00674,,,-0.099868,0.415818,-0.025736,,-0.442646,0.283930,0.080429,0.595095,...,,0.251479,0.878238,-0.126044,,-0.346832,[Missense_Mutation],[p.C136Y],Single_mutation,Tumor
C3L-00677,,,-0.474102,0.157080,0.081324,-0.256534,-0.261877,-0.069383,,,...,,0.476500,0.726599,,,-0.443770,[Missense_Mutation],[p.R173H],Single_mutation,Tumor
C3L-01040,,,0.213932,,0.081431,,-0.187680,0.093658,0.149175,,...,,0.228913,0.294451,,-0.155544,,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
C3N-03183,0.135839,,-0.048229,,0.333319,,,0.073602,0.435285,,...,-0.104482,-0.721475,-0.144328,,,,[Missense_Mutation],[p.D252G],Single_mutation,Tumor
C3N-03184,,,,0.029779,0.562808,0.375545,-0.122466,-0.212794,0.082460,,...,-0.311864,0.048687,,,,,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
C3N-03186,,,0.190067,0.204214,-0.020094,0.208449,,-0.129185,,-0.293418,...,,-0.024489,0.244612,,,0.455009,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
C3N-03188,-0.212662,,-0.288704,,0.050538,,,-0.330554,0.085500,,...,-0.083619,0.086609,-0.803688,,,,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor


In [6]:
cols = list(del_wt.columns[:-1])

g = u.wrap_ttest(del_wt, 'Mutation', cols,return_all = True, correction_method = 'fdr_bh')
g_pval = g.rename(columns = {'Comparison': 'Phosphoproteomics','P_Value': 'Gbm_P_Value'})
g_pval

  **kwargs)
  ret = ret.dtype.type(ret / rcount)
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)
  reject = pvals_sorted <= ecdffactor*alpha
  pvals_corrected[pvals_corrected>1] = 1
  pvals_corrected[pvals_corrected>1] = 1


Unnamed: 0,Phosphoproteomics,Gbm_P_Value
0,PTK2_phosphoproteomics_T660Y661_YMEDST*Y*YK,0.000220
1,SLC9A3R1_phosphoproteomics_S280_EALAEAALES*PRP...,0.001065
2,PIK3R1_phosphoproteomics_S83_KIS*PPTPK,0.001357
3,EGFR_phosphoproteomics_S1042S1045_TPLLSSLSATS*...,0.001429
4,AKT1_phosphoproteomics_S124S126S129_SGS*PS*DNS...,0.001594
...,...,...
338,SLC9A3R1_phosphoproteomics_S288T293_S*ASSDT*SE...,
339,SLC9A3R1_phosphoproteomics_S291S294S299_SASS*D...,
340,USP13_phosphoproteomics,
341,USP7_phosphoproteomics_Y143S149S151_IINY*RDDEK...,


In [7]:
d = del_wt[del_wt.Mutation == "Deletion"]
wt = del_wt[del_wt.Mutation == "Wildtype_Tumor"]
del_med = d.median()
wt_med = wt.median()

gbm_d = {}

for phos in cols:
    dif_gbm = wt_med[phos] - del_med[phos]
    gbm_d[phos] = dif_gbm

gbm_df = pd.DataFrame.from_dict(gbm_d, orient='index', columns=['Gbm_Median'])
gbm_df = gbm_df.reset_index().rename(columns={'index':'Phosphoproteomics'})
gbm_df

  return np.nanmean(a, axis, out=out, keepdims=keepdims)


Unnamed: 0,Phosphoproteomics,Gbm_Median
0,AKT1_phosphoproteomics_S122S124S126_S*GS*PS*DN...,-0.278248
1,AKT1_phosphoproteomics_S122S129_S*GSPSDNS*GAEE...,
2,AKT1_phosphoproteomics_S124_SGS*PSDNSGAEEMEVSLAK,0.098392
3,AKT1_phosphoproteomics_S124S126_SGS*PS*DNSGAEE...,-0.264459
4,AKT1_phosphoproteomics_S124S126S129_SGS*PS*DNS...,-0.299351
...,...,...
338,USP7_phosphoproteomics_S963_IIGVHQEDELLECLS*PATSR,0.197240
339,USP7_phosphoproteomics_T54_ITQNPVINGNVALSDGHNT...,-0.073719
340,USP7_phosphoproteomics_Y143S149S151_IINY*RDDEK...,
341,XIAP_phosphoproteomics_S361T363_TPS*LT*RR,-0.041787


In [8]:
g_merged = g_pval.merge(gbm_df, on='Phosphoproteomics',how='outer')
g_merged = g_merged.replace(to_replace = r'_phosphoproteomics', value = '', regex = True)
g_merged = g_merged.replace(to_replace = r'_[A-Z*]*$', value = '', regex = True)
g_merged

Unnamed: 0,Phosphoproteomics,Gbm_P_Value,Gbm_Median
0,PTK2_T660Y661,0.000220,-0.764033
1,SLC9A3R1_S280,0.001065,0.728675
2,PIK3R1_S83,0.001357,0.580407
3,EGFR_S1042S1045,0.001429,-0.349234
4,AKT1_S124S126S129,0.001594,-0.299351
...,...,...,...
338,SLC9A3R1_S288T293,,
339,SLC9A3R1_S291S294S299,,-0.559956
340,USP13,,
341,USP7_Y143S149S151,,


# Endo

In [9]:
#Endo ttest

mut_type_gbm = endo.get_genotype_all_vars(gene)

# merge cnv with genotype all mut type
prot_and_mutations = endo.join_omics_to_mutations(
    mutations_genes = [gene], omics_df_name = 'phosphoproteomics', omics_genes = ip)
prot_and_mutations = prot_and_mutations[prot_and_mutations.Sample_Status == "Tumor"] # drop Normal samples
prot_and_mutations = endo.reduce_multiindex(prot_and_mutations, flatten = True)
ip_df = prot_and_mutations.iloc[:,:-4] #drop mutation, location cols
mut_type_gbm = mut_type_gbm[['Mutation']] 
merged = ip_df.join(mut_type_gbm) # merge mutation col from function (includes cnv)

# Keep two values to compare
compare = ['Wildtype_Tumor','Nonsense_Mutation','Frame_Shift_Ins','Frame_Shift_Del']
get = merged['Mutation'].isin(compare)
trunc_wt = merged[get]
trunc_wt['Mutation'] = np.where(
                trunc_wt['Mutation'] == 'Wildtype_Tumor', 'Wildtype_Tumor', 'Truncation')
trunc_wt['Mutation'].value_counts()
#trunc_wt
prot_and_mutations

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Name,AKT1_phosphoproteomics_S122,AKT1_phosphoproteomics_S124,AKT1_phosphoproteomics_S126,AKT1_phosphoproteomics_S129,AKT1_phosphoproteomics_T450,CSNK2A1_phosphoproteomics,CSNK2A2_phosphoproteomics_S18,CSNK2A2_phosphoproteomics_S21,EGFR_phosphoproteomics_S1026,EGFR_phosphoproteomics_S1037,...,XIAP_phosphoproteomics_S261,XIAP_phosphoproteomics_S40,XIAP_phosphoproteomics_S402,XIAP_phosphoproteomics_S406,XIAP_phosphoproteomics_S427,XIAP_phosphoproteomics_S430,PTEN_Mutation_,PTEN_Location_,PTEN_Mutation_Status_,Sample_Status_
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
C3L-00006,,-0.04605,0.14810,-0.04605,,,,,-0.227,,...,,,-0.437,,,,"[Missense_Mutation, Nonsense_Mutation]","[p.R130Q, p.R233*]",Multiple_mutation,Tumor
C3L-00008,-0.2500,-0.26300,-0.12100,-0.18550,-0.00876,,0.0107,,,,...,,,,0.116,,-1.90,[Missense_Mutation],[p.G127R],Single_mutation,Tumor
C3L-00032,,0.21400,0.05050,0.03195,,,,,,,...,,,,,,,[Nonsense_Mutation],[p.W111*],Single_mutation,Tumor
C3L-00090,,-0.00300,-0.02820,0.04540,,,,-0.07190,,,...,-0.311,,0.000,-0.203,,,[Missense_Mutation],[p.R130G],Single_mutation,Tumor
C3L-00098,,-0.18965,-0.03930,-0.07715,-0.30100,,,,,,...,,,1.710,0.424,0.614,,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
C3N-01520,,-0.35400,-0.30800,-0.17250,,,,-0.00115,,,...,-0.519,,0.163,-0.333,,,"[Frame_Shift_Ins, Frame_Shift_Del]","[p.D268Gfs*30, p.N323Mfs*21]",Multiple_mutation,Tumor
C3N-01521,,-0.16350,-0.09890,-0.12995,0.00000,,,,,,...,,,-0.277,-0.511,-0.267,,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
C3N-01537,0.0241,0.02410,-0.01185,-0.04780,,,,1.00000,-0.321,,...,,,,,,,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor
C3N-01802,0.3130,-0.71750,-0.12300,-0.38450,-0.00828,,0.3740,,,,...,,,,-1.630,,1.15,[Wildtype_Tumor],[No_mutation],Wildtype_Tumor,Tumor


In [10]:
cols = list(trunc_wt.columns[:-1])

e_pval = u.wrap_ttest(trunc_wt, 'Mutation', cols, return_all = True,correction_method = 'fdr_bh')
e_pval = e_pval.rename(columns = {'Comparison': 'Phosphoproteomics','P_Value': 'En_P_Value'})
e_pval

  **kwargs)
  ret = ret.dtype.type(ret / rcount)
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)
  reject = pvals_sorted <= ecdffactor*alpha
  pvals_corrected[pvals_corrected>1] = 1
  pvals_corrected[pvals_corrected>1] = 1


Unnamed: 0,Phosphoproteomics,En_P_Value
0,CSNK2A2_phosphoproteomics_S21,0.001495
1,PTEN_phosphoproteomics_S475,0.009049
2,TP53_phosphoproteomics_S315,0.009797
3,PTK2_phosphoproteomics_S807,0.010277
4,ROCK1_phosphoproteomics_S704,0.024254
...,...,...
223,SHC1_phosphoproteomics_T380,
224,SLC9A3R1_phosphoproteomics_S162,
225,SLC9A3R1_phosphoproteomics_S46,
226,XIAP_phosphoproteomics_S40,


In [11]:
t = trunc_wt[trunc_wt.Mutation == "Truncation"]
wt = trunc_wt[trunc_wt.Mutation == "Wildtype_Tumor"]
trunc_med = t.median()
wt_med = wt.median()

en_d = {}

for phos in cols:
    dif = wt_med[phos] - trunc_med[phos]
    en_d[phos] = dif

en_df = pd.DataFrame.from_dict(en_d, orient='index', columns=['En_Median'])
en_df = en_df.reset_index().rename(columns={'index':'Phosphoproteomics'})
en_df

  return np.nanmean(a, axis, out=out, keepdims=keepdims)


Unnamed: 0,Phosphoproteomics,En_Median
0,AKT1_phosphoproteomics_S122,0.07530
1,AKT1_phosphoproteomics_S124,0.11230
2,AKT1_phosphoproteomics_S126,0.11370
3,AKT1_phosphoproteomics_S129,0.04810
4,AKT1_phosphoproteomics_T450,-0.02238
...,...,...
223,XIAP_phosphoproteomics_S40,
224,XIAP_phosphoproteomics_S402,0.20150
225,XIAP_phosphoproteomics_S406,-0.37250
226,XIAP_phosphoproteomics_S427,0.28950


In [12]:
e_merged = e_pval.merge(en_df, on='Phosphoproteomics',how='outer')
e_merged

Unnamed: 0,Phosphoproteomics,En_P_Value,En_Median
0,CSNK2A2_phosphoproteomics_S21,0.001495,0.839075
1,PTEN_phosphoproteomics_S475,0.009049,0.393800
2,TP53_phosphoproteomics_S315,0.009797,0.157500
3,PTK2_phosphoproteomics_S807,0.010277,0.263150
4,ROCK1_phosphoproteomics_S704,0.024254,0.299950
...,...,...,...
223,SHC1_phosphoproteomics_T380,,
224,SLC9A3R1_phosphoproteomics_S162,,
225,SLC9A3R1_phosphoproteomics_S46,,-0.178760
226,XIAP_phosphoproteomics_S40,,


# Ovarian

In [13]:
o = cptac.Ovarian()

                                            

In [14]:
gene = 'PTEN'

mut_type = o.get_genotype_all_vars(gene)

# merge cnv with genotype all mut type
prot_and_mutations = o.join_omics_to_mutations(
    mutations_genes = [gene], omics_df_name = 'phosphoproteomics', omics_genes = ip)
prot_and_mutations = prot_and_mutations[prot_and_mutations.Sample_Status == "Tumor"] # drop Normal samples
prot_and_mutations = o.reduce_multiindex(prot_and_mutations, levels_to_drop = 3, flatten=True)
prot_and_mutations = prot_and_mutations.loc[:,~prot_and_mutations.columns.duplicated()] # drop duplicated columns #FIX, 
# dropped 328 cols!  
ip_df = prot_and_mutations.iloc[:,:-4] #drop mutation, location cols
mut_type = mut_type[['Mutation']] 
merged = ip_df.join(mut_type) # merge mutation col from function (includes cnv)

# Keep two values to compare
compare = ['Wildtype_Tumor','Deletion']
get = merged['Mutation'].isin(compare)
del_wt = merged[get]
del_wt['Mutation'].value_counts()
print(len(del_wt.columns))
del_wt = del_wt.dropna(axis = 'columns', how='all')
del_wt.head()




157




Name,AKT1_phosphoproteomics_S124_R.SGS*PSDNSGAEEMEVSLAKPK.H,AKT1_phosphoproteomics_S126_R.SGSPS*DNSGAEEMEVSLAK.P,AKT1_phosphoproteomics_S126_R.SGSPS*DNSGAEEMEVSLAKPK.H,AKT1_phosphoproteomics_S129_R.SGSPSDNS*GAEEMEVSLAKPK.H,AKT1_phosphoproteomics_T450_I.TIT*PPDQDDSMECVDSER.R,AKT1_phosphoproteomics_T450_Q.MITIT*PPDQDDSMECVDSER.R,CSNK2A2_phosphoproteomics_S18_R.VYAEVNS*LR.S,EGFR_phosphoproteomics_S1057_R.NGLQS*CPIKEDSFLQR.Y,EGFR_phosphoproteomics_S1064_R.NGLQSCPIKEDS*FLQR.Y,EGFR_phosphoproteomics_S1104_P.AGS*VQNPVYHNQPLNPAPSR.D,...,SLC9A3R1_phosphoproteomics_T293_R.SASSDT*SEELNSQDSPPK.Q,TP53_phosphoproteomics_S315_K.RALPNNTSSS*PQPK.K,TP53_phosphoproteomics_S315_R.ALPNNTSSS*PQPK.K,USP13_phosphoproteomics_S630_R.GLQPGEEELPDIS*PPIVIPDDSKDR.L,USP7_phosphoproteomics_S18_K.AGEQQLS*EPEDMEMEAGDTDDPPR.I,XIAP_phosphoproteomics_S402_K.IQISGS*NYK.S,XIAP_phosphoproteomics_S87_R.KVS*PNCR.F,XIAP_phosphoproteomics_T356_R.TT*EKTPSLTR.R,XIAP_phosphoproteomics_T359_R.TTEKT*PSLTR.R,Mutation
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
01OV007,,-2.336,-1.332,-1.191,,,,,,-2.419,...,-4.115,,,,,,,,,Wildtype_Tumor
01OV017,,-2.022,-0.989,-1.169,,,,,,-2.061,...,-3.693,,,,,,,,,Deletion
01OV018,,,-1.503,,-2.25,,,,,,...,,,,,,,,,,Wildtype_Tumor
01OV023,-0.739,,,-1.371,-3.195,-2.372,-1.771,,,-1.967,...,,,-3.017,,,,,,-2.914,Deletion
01OV026,-1.454,,,-2.094,,,,,-2.278,-2.915,...,-4.686,,-1.976,,,,-0.869,-0.348,,Wildtype_Tumor


In [15]:
cols = list(del_wt.columns[:-1])

o_pval = u.wrap_ttest(del_wt, 'Mutation', cols, return_all = True, correction_method = 'fdr_bh')
o_pval = o_pval.rename(columns = {'Comparison': 'Phosphoproteomics','P_Value': 'Ov_P_Value'})
o_pval

  **kwargs)
  ret = ret.dtype.type(ret / rcount)
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)
  reject = pvals_sorted <= ecdffactor*alpha
  pvals_corrected[pvals_corrected>1] = 1
  pvals_corrected[pvals_corrected>1] = 1


Unnamed: 0,Phosphoproteomics,Ov_P_Value
0,PIK3R1_phosphoproteomics_Y470_R.LYEEY*TR.T,0.002950
1,PIK3R3_phosphoproteomics_Y219_R.LYEEY*TR.T,0.002950
2,NEDD4_phosphoproteomics_S884_R.GKTS*LDTSNDLGPL...,0.004646
3,EGFR_phosphoproteomics_S1104_P.AGS*VQNPVYHNQPL...,0.007877
4,EGFR_phosphoproteomics_S991_R.MHLPS*PTDSNFYR.A,0.029326
...,...,...
148,SLC9A3R1_phosphoproteomics_S280_R.EALAEAALES*P...,
149,SLC9A3R1_phosphoproteomics_S288_R.S*ASSDTSEELN...,
150,SLC9A3R1_phosphoproteomics_S291S302_R.SASS*DTS...,
151,XIAP_phosphoproteomics_S87_R.KVS*PNCR.F,


In [16]:
d = del_wt[del_wt.Mutation == "Deletion"]
wt = del_wt[del_wt.Mutation == "Wildtype_Tumor"]
del_med = d.median()
wt_med = wt.median()

ov_d = {}

for phos in cols:
    dif = wt_med[phos] - del_med[phos]
    ov_d[phos] = dif

o_df = pd.DataFrame.from_dict(ov_d, orient='index', columns=['Ov_Median'])
o_df = o_df.reset_index().rename(columns={'index':'Phosphoproteomics'})
o_df

  return np.nanmean(a, axis, out=out, keepdims=keepdims)


Unnamed: 0,Phosphoproteomics,Ov_Median
0,AKT1_phosphoproteomics_S124_R.SGS*PSDNSGAEEMEV...,-0.2670
1,AKT1_phosphoproteomics_S126_R.SGSPS*DNSGAEEMEV...,0.0810
2,AKT1_phosphoproteomics_S126_R.SGSPS*DNSGAEEMEV...,-0.3165
3,AKT1_phosphoproteomics_S129_R.SGSPSDNS*GAEEMEV...,-0.1040
4,AKT1_phosphoproteomics_T450_I.TIT*PPDQDDSMECVD...,0.2370
...,...,...
148,USP7_phosphoproteomics_S18_K.AGEQQLS*EPEDMEMEA...,0.3790
149,XIAP_phosphoproteomics_S402_K.IQISGS*NYK.S,-1.1640
150,XIAP_phosphoproteomics_S87_R.KVS*PNCR.F,-0.4010
151,XIAP_phosphoproteomics_T356_R.TT*EKTPSLTR.R,0.8270


In [17]:
o_merged = o_pval.merge(o_df, on='Phosphoproteomics',how='outer')
o_merged = o_merged.replace(to_replace = r'_phosphoproteomics', value = '', regex = True)
o_merged = o_merged.replace(to_replace = r'_[A-Z.*]*$', value = '', regex = True)
o_merged

Unnamed: 0,Phosphoproteomics,Ov_P_Value,Ov_Median
0,PIK3R1_Y470,0.002950,-0.6640
1,PIK3R3_Y219,0.002950,-0.6640
2,NEDD4_S884,0.004646,-0.4495
3,EGFR_S1104,0.007877,-0.6180
4,EGFR_S991,0.029326,-1.0500
...,...,...,...
148,SLC9A3R1_S280,,-0.3780
149,SLC9A3R1_S288,,-0.5650
150,SLC9A3R1_S291S302,,-0.3350
151,XIAP_S87,,-0.4010


# Breast

In [18]:
b = cptac.Brca()

                                         

In [19]:
mut_type = b.get_genotype_all_vars(gene)

# merge cnv with genotype all mut type
prot_and_mutations = b.join_omics_to_mutations(
    mutations_genes = [gene], omics_df_name = 'phosphoproteomics', omics_genes = ip)
#prot_and_mutations = prot_and_mutations.loc[:,~prot_and_mutations.columns.duplicated()] # drop duplicated columns
prot_and_mutations = prot_and_mutations[prot_and_mutations.Sample_Status == "Tumor"] # drop Normal samples
prot_and_mutations = b.reduce_multiindex(prot_and_mutations, levels_to_drop = 3)
prot_and_mutations = b.reduce_multiindex(prot_and_mutations, levels_to_drop = 2, flatten = True)
prot_and_mutations

ip_df = prot_and_mutations.iloc[:,:-4] #drop mutation, location cols
mut_type = mut_type[['Mutation']] 
merged = ip_df.join(mut_type) # merge mutation col from function (includes cnv)

# Keep two values to compare
compare = ['Wildtype_Tumor','Deletion']
get = merged['Mutation'].isin(compare)
del_wt = merged[get]
del_wt['Mutation'].value_counts()



Wildtype_Tumor    80
Deletion          24
Name: Mutation, dtype: int64

In [20]:
cols = list(del_wt.columns[:-1])

b_pval = u.wrap_ttest(del_wt, 'Mutation', cols, return_all = True, correction_method = 'fdr_bh')
b_pval = b_pval.rename(columns = {'Comparison': 'Phosphoproteomics','P_Value': 'Brca_P_Value'})
b_pval

  reject = pvals_sorted <= ecdffactor*alpha
  pvals_corrected[pvals_corrected>1] = 1
  pvals_corrected[pvals_corrected>1] = 1


Unnamed: 0,Phosphoproteomics,Brca_P_Value
0,MAST2_phosphoproteomics_S74S81,0.003703
1,MAGI3_phosphoproteomics_S1407,0.005574
2,SLC9A3R1_phosphoproteomics_S170,0.007694
3,ROCK1_phosphoproteomics_S2,0.008402
4,CSNK2A2_phosphoproteomics_S18,0.011835
...,...,...
136,USP7_phosphoproteomics_S963,0.973910
137,MAST2_phosphoproteomics_S92,0.983045
138,INPP4B_phosphoproteomics,
139,PIK3CB_phosphoproteomics,


In [21]:
d = del_wt[del_wt.Mutation == "Deletion"]
wt = del_wt[del_wt.Mutation == "Wildtype_Tumor"]
del_med = d.median()
wt_med = wt.median()

b_d = {}

for phos in cols:
    dif = wt_med[phos] - del_med[phos]
    b_d[phos] = dif

b_df = pd.DataFrame.from_dict(b_d, orient='index', columns=['Brca_Median'])
b_df = b_df.reset_index().rename(columns={'index':'Phosphoproteomics'})
b_df

  return np.nanmean(a, axis, out=out, keepdims=keepdims)


Unnamed: 0,Phosphoproteomics,Brca_Median
0,AKT1_phosphoproteomics_M446T450,0.51865
1,AKT1_phosphoproteomics_S124S126S129,-0.37585
2,AKT1_phosphoproteomics_S126,0.13445
3,AKT1_phosphoproteomics_S126S129,-0.16085
4,AKT1_phosphoproteomics_S129,0.02820
...,...,...
136,USP7_phosphoproteomics_S49T54,-0.43095
137,USP7_phosphoproteomics_S752,0.09245
138,USP7_phosphoproteomics_S963,0.03725
139,XIAP_phosphoproteomics_S402,-0.26415


In [22]:
b_merged = b_pval.merge(b_df, on='Phosphoproteomics',how='outer')
b_merged = b_merged.replace(to_replace = r'_phosphoproteomics', value = '', regex = True)
b_merged

Unnamed: 0,Phosphoproteomics,Brca_P_Value,Brca_Median
0,MAST2_S74S81,0.003703,-0.96550
1,MAGI3_S1407,0.005574,-1.31335
2,SLC9A3R1_S170,0.007694,1.33170
3,ROCK1_S2,0.008402,0.91895
4,CSNK2A2_S18,0.011835,-0.66225
...,...,...,...
136,USP7_S963,0.973910,0.03725
137,MAST2_S92,0.983045,0.23720
138,INPP4B,,
139,PIK3CB,,


# Colon

In [None]:
col = cptac.Colon()

Formatting dataframes...                  

In [None]:
mut_type = col.get_genotype_all_vars(gene)

# merge cnv with genotype all mut type
prot_and_mutations = col.join_omics_to_mutations(
    mutations_genes = [gene], omics_df_name = 'phosphoproteomics', omics_genes = ip)
prot_and_mutations = prot_and_mutations[prot_and_mutations.Sample_Status == "Tumor"] # drop Normal samples
#prot_and_mutations = col.reduce_multiindex(prot_and_mutations, levels_to_drop = 3)
prot_and_mutations = col.reduce_multiindex(prot_and_mutations, levels_to_drop = 2, flatten = True)
ip_df = prot_and_mutations.iloc[:,:-4] #drop mutation, location cols
mut_type = mut_type[['Mutation']] 
merged = ip_df.join(mut_type) # merge mutation col from function (includes cnv)

# Keep two values to compare
compare = ['Wildtype_Tumor','Deletion']
get = merged['Mutation'].isin(compare)
del_wt = merged[get]
del_wt['Mutation'].value_counts()
prot_and_mutations

In [None]:
cols = list(del_wt.columns[:-1])

c_pval = u.wrap_ttest(del_wt, 'Mutation', cols, return_all = True, correction_method = 'fdr_bh')
c_pval = c_pval.rename(columns = {'Comparison': 'Phosphoproteomics','P_Value': 'Colon_P_Value'})
c_pval

In [None]:
d = del_wt[del_wt.Mutation == "Deletion"]
wt = del_wt[del_wt.Mutation == "Wildtype_Tumor"]
del_med = d.median()
wt_med = wt.median()

d = {}

for phos in cols:
    dif = wt_med[phos] - del_med[phos]
    d[phos] = dif

c_df = pd.DataFrame.from_dict(d, orient='index', columns=['Colon_Median'])
c_df = c_df.reset_index().rename(columns={'index':'Phosphoproteomics'})
c_df

In [None]:
c_merged = c_pval.merge(c_df, on='Phosphoproteomics',how='outer')
c_merged = c_merged.replace(to_replace = r'_phosphoproteomics', value = '', regex = True)
c_merged

# Head and Neck

In [None]:
h = cptac.Hnscc()

In [None]:
mut_type = h.get_genotype_all_vars(gene)

# merge cnv with genotype all mut type
prot_and_mutations = h.join_omics_to_mutations(
    mutations_genes = [gene], omics_df_name = 'phosphoproteomics', omics_genes = ip)
prot_and_mutations = prot_and_mutations[prot_and_mutations.Sample_Status == "Tumor"] # drop Normal samples
prot_and_mutations = h.reduce_multiindex(prot_and_mutations, levels_to_drop = 3)
prot_and_mutations = h.reduce_multiindex(prot_and_mutations, levels_to_drop = 2, flatten=True)
prot_and_mutations = prot_and_mutations.loc[:,~prot_and_mutations.columns.duplicated()] # drop duplicated columns
# FIX - 17 duplicates

ip_df = prot_and_mutations.iloc[:,:-4] #drop mutation, location cols
mut_type = mut_type[['Mutation']] 
merged = ip_df.join(mut_type) # merge mutation col from function (includes cnv)

# Keep two values to compare
compare = ['Wildtype_Tumor','Deletion']
get = merged['Mutation'].isin(compare)
del_wt = merged[get]
del_wt['Mutation'].value_counts()
del_wt.head()

In [None]:
cols = list(del_wt.columns[:-1])

h_pval = u.wrap_ttest(del_wt, 'Mutation', cols, return_all = True)
h_pval = h_pval.rename(columns = {'Comparison': 'Phosphoproteomics','P_Value': 'Hnscc_P_Value'})
h_pval

In [None]:
d = del_wt[del_wt.Mutation == "Deletion"]
wt = del_wt[del_wt.Mutation == "Wildtype_Tumor"]
del_med = d.median()
wt_med = wt.median()

h_d = {}

for phos in cols:
    dif = wt_med[phos] - del_med[phos]
    h_d[phos] = dif

h_df = pd.DataFrame.from_dict(h_d, orient='index', columns=['Hnscc_Median'])
h_df = h_df.reset_index().rename(columns={'index':'Phosphoproteomics'})
h_df

In [None]:
h_merged = h_pval.merge(h_df, on='Phosphoproteomics',how='outer')
h_merged = h_merged.replace(to_replace = r'_phosphoproteomics', value = '', regex = True)
h_merged

# Lung

In [None]:
l = cptac.Luad()

In [None]:
mut_type = l.get_genotype_all_vars(gene)
mut_type = mut_type.rename(columns={'PTEN':'cnv'})

# different code because no somatic mutation data for pten (can't join to somatic mutations)
phos = l.get_phosphoproteomics(tissue_type='tumor')
ip_phos = phos[['PDGFRB', 'PIK3R2', 'PIK3C3','SLC9A3R1',
     'USP13','PIK3CA','TP53','EGFR','PTK2','USP7','XIAP','PTEN','PREX2','MAST2','PIK3CD',
    'ROCK1','NEDD4','AKT1', 'MVP', 'MAGI3','CSNK2A2', 'MDM2', 'CSNK2A1']] 
# 'PIK3CB' 'INPP4B' 'SH1' 'PRK3R1' 'PIK3R3' not in index
ip_phos = l.reduce_multiindex(ip_phos, levels_to_drop =3)
ip_phos = l.reduce_multiindex(ip_phos, levels_to_drop = 2, flatten = True)

merged = ip_phos.join(mut_type) # checked and there is 110 tumor samples for lung

compare = ['No_Mutation','Deletion']
get = merged['Mutation'].isin(compare)
del_wt = merged[get]
print(del_wt['Mutation'].value_counts())
del_wt.columns[:-2]

In [None]:
cols = list(del_wt.columns[:-2])

l_pval = u.wrap_ttest(del_wt, 'Mutation', cols, return_all = True, correction_method = 'fdr_bh')
l_pval = l_pval.rename(columns = {'Comparison': 'Phosphoproteomics','P_Value': 'Luad_P_Value'})
l_pval 

In [None]:
d = del_wt[del_wt.Mutation == "Deletion"]
wt = del_wt[del_wt.Mutation == "No_Mutation"]
del_med = d.median()
wt_med = wt.median()

l_d = {}

for phos in cols:
    dif = wt_med[phos] - del_med[phos]
    l_d[phos] = dif

l_df = pd.DataFrame.from_dict(l_d, orient='index', columns=['Luad_Median'])
l_df = l_df.reset_index().rename(columns={'index':'Phosphoproteomics'})
l_df

In [None]:
l_merged = l_pval.merge(l_df, on='Phosphoproteomics',how='outer')
l_merged

# Merge

In [None]:
n = g_merged.merge(h_merged, on='Proteomics',how='outer')
n = n.merge(l_merged, on='Proteomics',how='outer')
n= n.merge(b_merged, on='Proteomics',how='outer')
n = n.merge(o_merged, on='Proteomics',how='outer')
n = n.merge(e_merged, on='Proteomics',how='outer')
all_df = n.merge(c_merged, on='Proteomics',how='outer')



In [None]:
# Keep only genes significant in > 1 cancer
cols = ['Gbm_P_Value','Hnscc_P_Value','Luad_P_Value','Brca_P_Value','Ov_P_Value','En_P_Value','Colon_P_Value']

bc = .05/28

t = []

for c in cols:
    df = all_df[all_df[c] <= bc]
    t.append(list(df.Proteomics))
    
flat_list = [item for sublist in t for item in sublist]
sig = set(flat_list) # remove duplicates



bool_df = all_df['Proteomics'].isin(sig)
sig_df = all_df[bool_df]
sig_df

Combine

In [None]:
sig_df.to_csv('at_least_one_significant_interacting.csv')

In [None]:
stdev = ip_df.std()

In [None]:
stdev = stdev.to_frame()

In [None]:
stdev.loc[stdev[0] > .2]

In [None]:
d_stdev = stdev.std()
d_stdev

In [None]:
a = stdev.hist(bins = 20)
plt.title("Standard Deviations of Pancancer CNV deletions and amplifications of PTEN on Interacting Proteins\n")
plt.xlabel('StDeV')
plt.ylabel('Count')

In [None]:
plt.rcParams['figure.figsize']=(10,8)
sns.set(font_scale = 1.3)
a = sns.distplot(stdev, bins = 20)

a.set_title("Standard Deviation Distribution for Pancancer PTEN Mutation Effect on Proteomics\n\n (Median of proteomics with Wildtype PTEN - Median of proteomics with Mutated PTEN)")
a.set(xlabel = 'StDeV', ylabel = 'Frequency')

Mean

In [None]:
mean_df = stdev.mean()

In [None]:
a = mean_df.hist(bins = 20)
plt.title("Distribution of the Mean between Cancers for PTEN Mutation Effect on all Proteins\n\n (Median of proteomics with Wildtype PTEN - Median of proteomics with Mutated PTEN)")
plt.xlabel('Mean')
plt.ylabel('Count')

In [None]:
m_df = stdev.median()

In [None]:
a = m_df.hist(bins = 20)
plt.title("Distribution of the Medians between Cancers for PTEN Mutation Effect on all Proteins\n\n (Median of proteomics with Wildtype PTEN - Median of proteomics with Mutated PTEN)")
plt.xlabel('Median')
plt.ylabel('Count')