   # TP53: Effects of Mutation on Interacting Proteins

<b>Standard imports for playing with and plotting data frames.</b>

In [1]:
import pandas as pd
import numpy as np
import scipy.stats
import matplotlib.pyplot as plt
import seaborn as sns

<b>Import CPTAC data</b>

In [2]:
import CPTAC

Loading Clinical Data...
Loading Proteomics Data...
Loading Transcriptomics Data...
Loading CNA Data...
Loading Phosphoproteomics Data...
Loading Somatic Data...

 ******PLEASE READ******


In [3]:
somatic_mutations = CPTAC.get_somatic()
proteomics = CPTAC.get_proteomics()
phos = CPTAC.get_phosphoproteomics()

### List of proteins that interact with the protein (according to Uniprot)

In [67]:
gene = 'KRAS'
protList = ['EGFR', 'KRAS', 'ARAF', 'ERBB2', 'MAP2K1', 'MAPK1', 'BRAF', 'MAPK3', 'RAF1', 'SOS1', 'PIK3CA']

### Proteome abundance of proteins that interact with EGFR

In [72]:
#Specify the gene you want to analyze
sigList = [];

#Bonferroni correction since we are testing multiple proteins at once
pcutoff = 0.05/len(protList)

#Create a basic dataframe that contains T/F values for mutation at TP53
genedf = somatic_mutations[gene].to_frame()

#Loop through all interacting proteins to find ones with significant changes in protein levels
#when TP53 is mutated
for protein in protList:
    if protein in proteomics.columns:
        proteindf = proteomics[protein].to_frame()
        proteindfName = protein + " protein" #Necessary when the gene and protein have the same name
        proteindf.columns = [proteindfName]
        
        cross = genedf.add(proteindf, fill_value=0).dropna(axis=0)
        mutated = cross.loc[cross[gene] == 1.0]
        wt = cross.loc[cross[gene] == 0.0]
        
        ttest = scipy.stats.ttest_ind(mutated[proteindfName], wt[proteindfName])
        if ttest[1] <= pcutoff:
            sigList.append(protein)
            print("Test for " + protein + ": ")
            print(ttest)

### List of significantly affected proteins

In [73]:
print(sigList)

[]


### Phosphoproteome abundance of interacting proteins

In [74]:
#This is the same list as before, with some of the end numbers of the proteins removed
#Less specificity in protein names captures more potentially significant results in the phosphoproteomic data

protList = ['EGFR', 'KRAS', 'ARAF', 'ERBB2', 'MAP2K1', 'MAPK1', 'BRAF', 'MAPK3', 'RAF1', 'SOS1', 'PIK3CA']

In [76]:
#Specify gene of interest
genedf = somatic_mutations[gene].to_frame()
sigPhosResults = [];

#Build the dataframe with all the phosphorylation sites on the proteins listed above
for protein in protList:
    sites = phos.filter(regex=protein)
    genedf = genedf.add(sites, fill_value=0)

mutated = genedf.loc[genedf[gene] == 1.0]
wt = genedf.loc[genedf[gene] == 0.0]

#Bonferroni correction for all the sites we are testing at once
pcutoff = 0.05 / len(genedf.columns)

#Test each location one by one and print significant results
for loc in genedf.columns:
    if not loc == gene:
        
        mutsitedf = mutated[[gene, loc]].dropna()
        wtsitedf = wt[[gene, loc]].dropna()
        
        ttest = scipy.stats.ttest_ind(mutsitedf[loc], wtsitedf[loc])
        if(ttest[1] <= pcutoff):
            sigPhosResults.append(loc)
            print('Results for ' + loc + ': ')
            print(ttest)

Results for ARAF-S260: 
Ttest_indResult(statistic=5.1539316473499825, pvalue=1.665516443552024e-06)
Results for ARAF-T256: 
Ttest_indResult(statistic=3.638629367252144, pvalue=0.0005162594349961674)
Results for SOS1-S1229: 
Ttest_indResult(statistic=3.937102285266522, pvalue=0.00018474397689668083)
Results for TRAFD1-S480: 
Ttest_indResult(statistic=3.9254242658334673, pvalue=0.0002471818737979513)


### List of significantly affected phosphorylation sites

In [29]:
print(sigPhosResults)

[]
