   # TP53: Effects of Mutation on Interacting Proteins

<b>Standard imports for playing with and plotting data frames.</b>

In [1]:
import pandas as pd
import numpy as np
import scipy.stats
import matplotlib.pyplot as plt
import seaborn as sns

<b>Import CPTAC data</b>

In [2]:
import CPTAC

Loading Dictionary...
Loading Clinical Data...
Loading Proteomics Data...
Loading Transcriptomics Data...
Loading CNA Data...
Loading Phosphoproteomics Data...
Loading Somatic Data...

 ******PLEASE READ******
CPTAC is a community resource project and data are made available
rapidly after generation for community research use. The embargo
allows exploring and utilizing the data, but the data may not be in a
publication until July 1, 2019. Please see
https://proteomics.cancer.gov/data-portal/about/data-use-agreement or
enter CPTAC.embargo() to open the webpage for more details.


In [3]:
somatic_mutations = CPTAC.get_somatic()
proteomics = CPTAC.get_proteomics()
phos = CPTAC.get_phosphoproteomics()

### List of proteins that interact with TP53 (according to Uniprot and String)

In [4]:
gene = 'TP53'
protList = ['AXIN1', 'EP300', 'HRMT1L2', 'CARM1', 'TAF1', 'ING4', 'CABLES1', 
            'TP73', 'HIPK1', 'HIPK2', 'TP53INP1', 'TP53BP', 'WWOX', 'HCV', 'USP7', 'SYVN1', 'HSP90AB1',
           'CHD8', 'ARMC10', 'BANP', 'CDKN2AIP', 'NUAK1', 'STK11', 'LKB1', 'UHRF2', 'E4F1', 'YWHAZ',
           'MAML1', 'MKRN1', 'PML', 'MDM2', 'FBXO42', 'ATM', 'PP2R2A', 'AURKA', 'DAXX', 'BRD7', 'TRIM24',
           'L3MBTL1', 'GRK5', 'CAK', 'PTK2B', 'PYK2', 'MDM2', 'PRKCG', 'PPIF', 'KAT6A', 'UBC9', 'ZNF385B',
           'ZNF385A', 'ANKRD2', 'RFFL', 'RNF34', 'MTA1', 'COP1', 'CCAR2', 'MORC3', 'POU4F2', 'AFG1L', 'UBD',
           'TAF6', 'HPV', 'UL123', 'E1B-55K', 'BAX', 'FAS', 'BCL2', 'CREBBP', 'CDK2', 'MDM2', 'CDKN2A', 
            'CDKN1A', 'ATM', 'MDM4']

### t-test for proteome abundance of these proteins when TP53 is mutated
<b>Note: We are only interested in looking at proteins whose abundance is significantly affected by TP53 mutation, and we must do a Bonferroni correction on our p-value</b>

In [5]:
pcutoff = 0.05 / len(protList)

In [6]:
significantResults = [];
for protein in protList: 
    if protein in proteomics.columns:
        cross = CPTAC.compare_mutations(proteomics, protein, gene)
        cross = cross[["Mutation", protein]].dropna(axis=0)
        mutated = cross.loc[cross["Mutation"] == "Missense_Mutation"]
        wt = cross.loc[cross["Mutation"] == "Wildtype"]
        ttest = scipy.stats.ttest_ind(mutated[protein], wt[protein])
        if(ttest[1] <= pcutoff):
            significantResults.append(protein)
            print("Test for " + protein + ": ")
            print(ttest) 

Test for CABLES1: 
Ttest_indResult(statistic=-4.24590117102032, pvalue=4.113425690389564e-05)
Test for HSP90AB1: 
Ttest_indResult(statistic=4.690922135620032, pvalue=6.788129419108358e-06)
Test for CHD8: 
Ttest_indResult(statistic=4.424235399110169, pvalue=2.0266453529471308e-05)
Test for STK11: 
Ttest_indResult(statistic=-3.5211494383287114, pvalue=0.0006019563427740366)
Test for UHRF2: 
Ttest_indResult(statistic=-4.171494692888455, pvalue=5.495017036347648e-05)
Test for AURKA: 
Ttest_indResult(statistic=5.024970740616274, pvalue=3.00775411680297e-06)
Test for PPIF: 
Ttest_indResult(statistic=3.5713662057749924, pvalue=0.0004986381519426147)
Test for AFG1L: 
Ttest_indResult(statistic=-3.5029592690884357, pvalue=0.0006455455480986071)
Test for CDKN2A: 
Ttest_indResult(statistic=3.867258345930768, pvalue=0.0001779669053930629)


In [31]:
test = CPTAC.compare_mutations(proteomics, 'CABLES1', 'TP53')[['CABLES1', 'Mutation']]
test2 = CPTAC.compare_mutations(proteomics, 'HSP90AB1', 'TP53')['HSP90AB1'].to_frame()
test = test.merge(test2, left_index=True, right_index=True)

### List of significantly affected proteins

In [7]:
print(significantResults)

['CABLES1', 'HSP90AB1', 'CHD8', 'STK11', 'UHRF2', 'AURKA', 'PPIF', 'AFG1L', 'CDKN2A']


### Build the dataframe for plotting

In [None]:
plotdf = CPTAC.compare_mutations(proteomics, si)
for protein in significantResults:
    

## Phosphoproteome abundance of interacting proteins

### This is the same list as before, with some of the end numbers of the proteins removed; less specificity in protein names captures more potentially significant results in the phosphoproteomic data

In [8]:
phosProtList = ['AXIN', 'EP', 'HRMT', 'CARM', 'TAF', 'ING', 'CABLES', 
            'TP73', 'HIPK', 'TP53INP', 'TP53BP', 'WWOX', 'HCV', 'USP', 'SYVN', 'HSP90AB1',
           'CHD', 'ARMC', 'BANP', 'CDKN', 'NUAK', 'STK', 'LKB', 'UHRF', 'E4F', 'YWHAZ',
           'MAML', 'MKRN', 'PML', 'MDM', 'FBXO', 'ATM', 'PP2R', 'AURKA', 'DAXX', 'BRD', 'TRIM',
           'L3MBTL', 'GRK', 'CAK', 'PTK', 'PYK', 'PRKCG', 'PPIF', 'KAT', 'UBC', 'ZNF',
            'ANKRD', 'RFFL', 'RNF', 'MTA', 'COP', 'CCAR', 'MORC', 'POU4F', 'AFG1L', 'UBD',
           'TAF', 'HPV', 'UL1', 'E1B', 'BAX', 'FAS', 'BCL2', 'CREBBP', 'CDK2', 'MDM', 'CDKN', 'ATM']

In [9]:
significantPhosResults = [];

#Bonferroni correction for all the sites we are testing at once
#pcutoff = 0.05 / len(genedf.columns)

#Test each protein one by one and print significant results
for pattern in phosProtList:
    if(len(testResults) > 0):
        for result in testResults:
            print(result)

NameError: name 'testResults' is not defined

### List of significantly affected phosphorylation sites

In [None]:
print(sigPhosResults)