Interacting Proteins

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
import gseapy as gp
import re
import sys 

import cptac
import cptac.utils as u

import sys
sys.path.append('C:\\Users\\brittany henderson\\GitHub\\WhenMutationsDontMatter\\')
import plot_utils as p

In [2]:
gbm = cptac.Gbm()
endo = cptac.Endometrial()

Checking that index is up-to-date...



                                    

In [5]:
ip = u.get_interacting_proteins('PTEN')
len(ip)

28

In [6]:
gene = 'PTEN'

In [7]:
mut_type_gbm = gbm.get_genotype_all_vars(gene)

# merge cnv with genotype all mut type
prot_and_mutations = gbm.join_omics_to_mutations(
    mutations_genes = [gene], omics_df_name = 'proteomics', omics_genes = ip)
prot_and_mutations = prot_and_mutations[prot_and_mutations.Sample_Status == "Tumor"] # drop Normal samples
ip_df = prot_and_mutations.iloc[:,:-4] #drop mutation, location cols
mut_type_gbm = mut_type_gbm[['Mutation']] 
merged = ip_df.join(mut_type_gbm) # merge mutation col from function (includes cnv)

# Keep two values to compare
compare = ['Wildtype_Tumor','Deletion']
get = merged['Mutation'].isin(compare)
del_wt = merged[get]
del_wt['Mutation'].value_counts()



Deletion          81
Wildtype_Tumor    16
Name: Mutation, dtype: int64

In [10]:
cols = list(del_wt.columns[:-1])

g_pval = u.wrap_ttest(del_wt, 'Mutation', cols, return_all = True)
g_pval

Unnamed: 0,Comparison,P_Value
0,PTEN_proteomics,1.219523e-07
1,PIK3CD_proteomics,0.001030815
2,PIK3C3_proteomics,0.001225949
3,USP7_proteomics,0.004017792
4,PIK3R1_proteomics,0.004282296
5,MAST2_proteomics,0.01260293
6,INPP4B_proteomics,0.01455578
7,SLC9A3R1_proteomics,0.02487273
8,EGFR_proteomics,0.04010871
9,USP13_proteomics,0.05478559


Endo

In [11]:
#Endo ttest
mut_type_en = endo.get_genotype_all_vars(gene)

# merge cnv with genotype all mut type
prot_and_mutations = endo.join_omics_to_mutations(
    mutations_genes = [gene], omics_df_name = 'proteomics', omics_genes = ip)
prot_and_mutations = prot_and_mutations[prot_and_mutations.Sample_Status == "Tumor"] # drop Normal samples
ip_df = prot_and_mutations.iloc[:,:-4] #drop mutation, location cols
mut_type_en = mut_type_en[['Mutation']] 
merged = ip_df.join(mut_type_en) # merge mutation col from function (includes cnv)

# Keep two values to compare
compare = ['Wildtype_Tumor','Deletion']
get = merged['Mutation'].isin(compare)
del_wt = merged[get]
del_wt['Mutation'].value_counts()



Wildtype_Tumor    15
Deletion           3
Name: Mutation, dtype: int64

Ovarian

In [12]:
o = cptac.Ovarian()

                                    

In [14]:
mut_type_o = o.get_genotype_all_vars(gene)

# merge cnv with genotype all mut type
prot_and_mutations = o.join_omics_to_mutations(
    mutations_genes = [gene], omics_df_name = 'proteomics', omics_genes = ip)
prot_and_mutations = o.reduce_multiindex(prot_and_mutations, levels_to_drop = 1)
prot_and_mutations = prot_and_mutations.loc[:,~prot_and_mutations.columns.duplicated()] # drop duplicated columns
prot_and_mutations = prot_and_mutations[prot_and_mutations.Sample_Status == "Tumor"] # drop Normal samples
ip_df = prot_and_mutations.iloc[:,:-4] #drop mutation, location cols
mut_type_o = mut_type_o[['Mutation']] 
merged = ip_df.join(mut_type_o) # merge mutation col from function (includes cnv)

# Keep two values to compare
compare = ['Wildtype_Tumor','Deletion']
get = merged['Mutation'].isin(compare)
del_wt = merged[get]
del_wt['Mutation'].value_counts()



Wildtype_Tumor    46
Deletion          20
Name: Mutation, dtype: int64

In [16]:
cols = list(del_wt.columns[:-1])

o_pval = u.wrap_ttest(del_wt, 'Mutation', cols, return_all = True)
o_pval

  reject = pvals <= alphacBonf
  pvals_corrected[pvals_corrected>1] = 1


Unnamed: 0,Comparison,P_Value
0,PTEN_proteomics,0.000909
1,PIK3R2_proteomics,0.005865
2,PIK3CA_proteomics,0.008335
3,PIK3CB_proteomics,0.050313
4,PREX2_proteomics,0.060296
5,PTK2_proteomics,0.080941
6,MAGI3_proteomics,0.150601
7,MAST2_proteomics,0.16486
8,PIK3R1_proteomics,0.219977
9,PDGFRB_proteomics,0.231014


Breast

In [17]:
b = cptac.Brca()

                                    

In [18]:
mut_type_b = b.get_genotype_all_vars(gene)

# merge cnv with genotype all mut type
prot_and_mutations = b.join_omics_to_mutations(
    mutations_genes = [gene], omics_df_name = 'proteomics', omics_genes = ip)
prot_and_mutations = b.reduce_multiindex(prot_and_mutations, levels_to_drop = 1)
prot_and_mutations = prot_and_mutations.loc[:,~prot_and_mutations.columns.duplicated()] # drop duplicated columns
prot_and_mutations = prot_and_mutations[prot_and_mutations.Sample_Status == "Tumor"] # drop Normal samples
ip_df = prot_and_mutations.iloc[:,:-4] #drop mutation, location cols
mut_type_b = mut_type_b[['Mutation']] 
merged = ip_df.join(mut_type_b) # merge mutation col from function (includes cnv)

# Keep two values to compare
compare = ['Wildtype_Tumor','Deletion']
get = merged['Mutation'].isin(compare)
del_wt = merged[get]
del_wt['Mutation'].value_counts()



Wildtype_Tumor    80
Deletion          24
Name: Mutation, dtype: int64

In [20]:
cols = list(del_wt.columns[:-1])

b_pval = u.wrap_ttest(del_wt, 'Mutation', cols, return_all = True)
b_pval

  reject = pvals <= alphacBonf
  pvals_corrected[pvals_corrected>1] = 1


Unnamed: 0,Comparison,P_Value
0,PTEN_proteomics,4e-06
1,SHC1_proteomics,0.014172
2,EGFR_proteomics,0.016614
3,TP53_proteomics,0.068427
4,PIK3R1_proteomics,0.164475
5,MAST2_proteomics,0.213636
6,CSNK2A2_proteomics,0.262209
7,INPP4B_proteomics,0.29597
8,USP13_proteomics,0.3256
9,PTK2_proteomics,0.353546


Colon

In [21]:
col = cptac.Colon()

                                    

In [22]:
mut_type_c = col.get_genotype_all_vars(gene)

# merge cnv with genotype all mut type
prot_and_mutations = col.join_omics_to_mutations(
    mutations_genes = [gene], omics_df_name = 'proteomics', omics_genes = ip)
prot_and_mutations = prot_and_mutations[prot_and_mutations.Sample_Status == "Tumor"] # drop Normal samples
ip_df = prot_and_mutations.iloc[:,:-4] #drop mutation, location cols
mut_type_c = mut_type_c[['Mutation']] 
merged = ip_df.join(mut_type_c) # merge mutation col from function (includes cnv)

# Keep two values to compare
compare = ['Wildtype_Tumor','Deletion']
get = merged['Mutation'].isin(compare)
del_wt = merged[get]
del_wt['Mutation'].value_counts()



Wildtype_Tumor    76
Deletion          25
Name: Mutation, dtype: int64

In [23]:
cols = list(del_wt.columns[:-1])

c_pval = u.wrap_ttest(del_wt, 'Mutation', cols, return_all = True)
c_pval

  reject = pvals <= alphacBonf
  pvals_corrected[pvals_corrected>1] = 1


Unnamed: 0,Comparison,P_Value
0,PTEN_proteomics,0.012872
1,SHC1_proteomics,0.030107
2,PIK3C3_proteomics,0.116419
3,INPP4B_proteomics,0.118135
4,PIK3CD_proteomics,0.1312
5,CSNK2A1_proteomics,0.144425
6,MVP_proteomics,0.182782
7,USP7_proteomics,0.237441
8,PDGFRB_proteomics,0.250276
9,CSNK2A2_proteomics,0.326372


Head and Neck

In [24]:
h = cptac.Hnscc()

                                    

In [26]:
mut_type_h = h.get_genotype_all_vars(gene)

# merge cnv with genotype all mut type
prot_and_mutations = h.join_omics_to_mutations(
    mutations_genes = [gene], omics_df_name = 'proteomics', omics_genes = ip)
prot_and_mutations = prot_and_mutations.loc[:,~prot_and_mutations.columns.duplicated()] # drop duplicated columns
prot_and_mutations = prot_and_mutations[prot_and_mutations.Sample_Status == "Tumor"] # drop Normal samples
ip_df = prot_and_mutations.iloc[:,:-4] #drop mutation, location cols
mut_type_h = mut_type_h[['Mutation']] 
merged = ip_df.join(mut_type_h) # merge mutation col from function (includes cnv)

# Keep two values to compare
compare = ['Wildtype_Tumor','Deletion']
get = merged['Mutation'].isin(compare)
del_wt = merged[get]
del_wt['Mutation'].value_counts()



Wildtype_Tumor    87
Deletion          18
Name: Mutation, dtype: int64

In [33]:
cols = list(del_wt.columns[:-1])

h_pval = u.wrap_ttest(del_wt, 'Mutation', cols, return_all = True)
h_pval

  **kwargs)
  ret = ret.dtype.type(ret / rcount)
  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)
  reject = pvals <= alphacBonf
  pvals_corrected[pvals_corrected>1] = 1


Unnamed: 0,Comparison,P_Value
0,PTEN_proteomics,2.3e-05
1,INPP4B_proteomics,0.000676
2,PIK3CA_proteomics,0.001677
3,USP13_proteomics,0.005645
4,PIK3CD_proteomics,0.014016
5,EGFR_proteomics,0.017719
6,SLC9A3R1_proteomics,0.031385
7,TP53_proteomics,0.031801
8,CSNK2A1_proteomics,0.040586
9,USP7_proteomics,0.064384


Combine

In [52]:
df = g_pval.join(o_pval, rsuffix = '_ov')
df = df.join(b_pval, rsuffix = '_brca')
df = df.join(c_pval, rsuffix = '_col')
df = df.join(h_pval, rsuffix = '_hnscc')
drop_cols = ['Comparison_ov','Comparison_brca','Comparison_col','Comparison_hnscc']
df = df.drop(drop_cols, axis = 1)
df = df.rename(columns = {'P_Value': 'P_Value_gbm'})
df

Unnamed: 0,Comparison,P_Value_gbm,P_Value_ov,P_Value_brca,P_Value_col,P_Value_hnscc
0,PTEN_proteomics,1.219523e-07,0.000909,4e-06,0.012872,2.3e-05
1,PIK3CD_proteomics,0.001030815,0.005865,0.014172,0.030107,0.000676
2,PIK3C3_proteomics,0.001225949,0.008335,0.016614,0.116419,0.001677
3,USP7_proteomics,0.004017792,0.050313,0.068427,0.118135,0.005645
4,PIK3R1_proteomics,0.004282296,0.060296,0.164475,0.1312,0.014016
5,MAST2_proteomics,0.01260293,0.080941,0.213636,0.144425,0.017719
6,INPP4B_proteomics,0.01455578,0.150601,0.262209,0.182782,0.031385
7,SLC9A3R1_proteomics,0.02487273,0.16486,0.29597,0.237441,0.031801
8,EGFR_proteomics,0.04010871,0.219977,0.3256,0.250276,0.040586
9,USP13_proteomics,0.05478559,0.231014,0.353546,0.326372,0.064384


In [53]:
df.to_csv('del_vs_wt_pvals.csv')

In [26]:
ip_df.to_csv('cnv_del_vs_amp_interacting.csv')