   # TP53: Effects of Mutation on Pathway Proteins

<b>Standard imports for playing with and plotting data frames.</b>

In [3]:
import pandas as pd
import numpy as np
import scipy.stats
import matplotlib.pyplot as plt
import seaborn as sns

<b>Import CPTAC data</b>

In [4]:
import CPTAC

Loading Dictionary...
Loading Clinical Data...
Loading Proteomics Data...
Loading Transcriptomics Data...
Loading CNA Data...
Loading Phosphoproteomics Data...
Loading Somatic Data...

 ******PLEASE READ******
CPTAC is a community resource project and data are made available
rapidly after generation for community research use. The embargo
allows exploring and utilizing the data, but the data may not be in a
publication until July 1, 2019. Please see
https://proteomics.cancer.gov/data-portal/about/data-use-agreement or
enter CPTAC.embargo() to open the webpage for more details.


In [5]:
somatic_mutations = CPTAC.get_somatic()
proteomics = CPTAC.get_proteomics()
phos = CPTAC.get_phosphoproteomics()

### List of proteins that are affected by TP53 (somewhere along the pathway) according to String (https://string-db.org/cgi/network.pl?taskId=2nm4wTXCr5PB)

In [6]:
gene = 'TP53'
protList = ['BAX', 'FAS', 'BCL2', 'CREBBP', 'CDK2', 'MDM2', 'CDKN2A', 'CDKN1A', 'ATM', 'MDM4']

### Proteome abundance

In [7]:
sigList = [];

#Bonferroni correction since we are testing multiple proteins at once
pcutoff = 0.05/len(protList)

for protein in protList: 
    if protein in proteomics.columns:
        cross = CPTAC.compare_mutations(proteomics, protein, gene)
        cross = cross[["Mutation", protein]].dropna(axis=0)
        mutated = cross.loc[cross["Mutation"] != "Wildtype"]
        wt = cross.loc[cross["Mutation"] == "Wildtype"]
        ttest = scipy.stats.ttest_ind(mutated[protein], wt[protein])
        if(ttest[1] <= pcutoff):
            sigList.append(protein)
            print("Test for " + protein + ": ")
            print(ttest) 

Test for FAS: 
Ttest_indResult(statistic=-3.26032866320367, pvalue=0.0013995805482746378)
Test for CREBBP: 
Ttest_indResult(statistic=-2.8593468853476574, pvalue=0.004899792425790758)
Test for CDK2: 
Ttest_indResult(statistic=4.771265982463181, pvalue=4.574953009415814e-06)
Test for CDKN2A: 
Ttest_indResult(statistic=5.274874978385382, pvalue=5.392665622801732e-07)


### List of significantly affected proteins

In [8]:
print(sigList)

['FAS', 'CREBBP', 'CDK2', 'CDKN2A']


### Phosphoproteome abundance of pathway proteins

In [10]:
#Specify gene of interest
gene = 'TP53'
genedf = somatic_mutations[gene].to_frame()
sigPhosResults = [];

#Build the dataframe with all the phosphorylation sites on the proteins listed above
for protein in protList:
    sites = phos.filter(regex=protein)
    genedf = genedf.add(sites, fill_value=0)

mutated = genedf.loc[genedf[gene] == 1.0]
wt = genedf.loc[genedf[gene] == 0.0]

#Bonferroni correction for all the sites we are testing at once
pcutoff = 0.05 / len(genedf.columns)

#Test each location one by one
for loc in genedf.columns:
    if not loc == gene:
        mutsitedf = mutated[[gene, loc]].dropna()
        wtsitedf = wt[[gene, loc]].dropna()
        ttest = scipy.stats.ttest_ind(mutsitedf[loc], wtsitedf[loc])
        if(ttest[1] <= pcutoff):
            sigPhosResults.append(loc)
            print('Results for ' + loc + ': ')
            print(ttest)
 

Results for BCL2L12-S273: 
Ttest_indResult(statistic=3.6925070055844387, pvalue=0.0004158661622105229)
Results for CREBBP-T974: 
Ttest_indResult(statistic=-3.9777257442589176, pvalue=0.0002018327186438462)


### List of significantly affected phosphorylation sites

In [11]:
print(sigPhosResults)

['BCL2L12-S273', 'CREBBP-T974']
