# Examine trans effect of TP53 Hotspot mutation in Ovarian Cancer on Phosphoproteomics

### Specifically examine Phosphorylation

#### Library Imports

In [1]:
import pandas as pd
import numpy as np
import scipy.stats

import cptac
import cptac.utils as ut

import seaborn as sns
import matplotlib.pyplot as plt

#Use Ovarian cancer object
co = cptac.Ovarian()



                                    

## Use parse hotspot utility to analyze TP53 hotspot mutation in Ovarian cancer

#### Get somatic mutation data from cptac

In [2]:
somatic_mutations = co.get_somatic_mutation()

#### Use parse hotspot utility

In [3]:
vis, binary, detailed, dictionary = ut.parse_hotspot('../../HotspotClusterFiles/ovarian_somatic_38.maf.3D_Proximity.pairwise.recurrence.l0.r10.clusters', somatic_mutations)

In [4]:
vis

Unnamed: 0,hotspot_id,patients_within
0,ADH1A,2.0
1,CDCA2,3.0
2,CPS1,2.0
3,EIF4A1,3.0
4,FGF18,3.0
5,GCA,2.0
6,GOT1,2.0
7,HNRNPM,3.0
8,KDM2A,2.0
9,KHK,2.0


As we can see, TP53 has 5 hotspots in Ovarian cancer

#### Select only TP53 Hotspots

In [5]:
#There are 5 TP53 hotspots
TP53_Hotspots = detailed[['TP53','TP53_1','TP53_2','TP53_3','TP53_4']]
TP53_Hotspots.head()

Unnamed: 0_level_0,TP53,TP53_1,TP53_2,TP53_3,TP53_4
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
S002,No,No,No,No,No
S006,No,No,No,No,No
S007,No,Yes_HS,No,No,No
S009,No,Yes,No,No,No
S011,No,Yes,No,No,No


#### Make binary column for samples that have a TP53 Hotspot Mutation

In [6]:
TP53_Hotspots['Hotspot_Mutation'] = ''
for ind, row in TP53_Hotspots.iterrows():
    for col in TP53_Hotspots.columns:
        if row[col] == 'Yes_HS':
            TP53_Hotspots.loc[ind,'Hotspot_Mutation'] = 'TP53_Hotspot_Mutation'
        if row[col] == 'Yes':
            TP53_Hotspots.loc[ind,'Hotspot_Mutation'] = 'TP53_Hotspot_Mutation'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


#### Grab only the binary yes/no hotspot mutation

In [7]:
binary_hotspots = TP53_Hotspots['Hotspot_Mutation']

#### Get Proteomic data, append Binary hotspot mutation column

In [8]:
#Use join omics to mutations
proteomics_df = co.join_omics_to_mutations(omics_df_name='phosphoproteomics', mutations_genes='TP53')

#Remove multiindex from df
proteomics_df = co.reduce_multiindex(proteomics_df, flatten=True)

#Remove Non-Tumor Samples
proteomics_df = proteomics_df.loc[proteomics_df['Sample_Status'] == 'Tumor']

#Remove columns that are not needed
proteomics_df = proteomics_df.drop(['TP53_Location','Sample_Status'],axis=1)
proteomics_df['TP53_Hotspot_Mutation'] = binary_hotspots

#Show df
proteomics_df.head()



Name,AAAS_phosphoproteomics_S495_R.FS*PVLGR.A_NP_001166937,AAAS_phosphoproteomics_S495_R.FS*PVLGR.A_NP_056480,AAGAB_phosphoproteomics_S311_K.AFWMAIGGDRDEIEGLSS*DEEH.-_NP_001258815,AAGAB_phosphoproteomics_S311_K.AFWMAIGGDRDEIEGLSS*DEEH.-_NP_078942,AAK1_phosphoproteomics_S18_R.REQGGSGLGS*GSSGGGGSTSGLGSGYIGR.V_NP_055726,AAK1_phosphoproteomics_S20_R.EQGGSGLGSGS*SGGGGSTSGLGSGYIGR.V_NP_055726,AAK1_phosphoproteomics_S20_R.REQGGSGLGSGS*SGGGGSTSGLGSGYIGR.V_NP_055726,AAK1_phosphoproteomics_S21_R.EQGGSGLGSGSS*GGGGSTSGLGSGYIGR.V_NP_055726,AAK1_phosphoproteomics_S624_K.VGSLTPPSS*PK.T_NP_055726,AAK1_phosphoproteomics_S637_R.ILS*DVTHSAV.F_NP_055726,...,ZZZ3_phosphoproteomics_S113_R.RQTEPVS*PVLKR.I_NP_056349,ZZZ3_phosphoproteomics_S135_R.SEAPNSSEEDS*PIKSDK.E_NP_056349,ZZZ3_phosphoproteomics_S314_F.SETQSSLRDS*EEEVDVVGDSSASK.E_NP_056349,ZZZ3_phosphoproteomics_S391_R.AAPTRGS*PTK.N_NP_056349,ZZZ3_phosphoproteomics_S89_R.GLS*SSEKDNIER.Q_NP_056349,ZZZ3_phosphoproteomics_S90_R.GLSS*SEKDNIER.Q_NP_056349,ZZZ3_phosphoproteomics_S91_R.GLSSS*EKDNIER.Q_NP_056349,TP53_Mutation,TP53_Mutation_Status,TP53_Hotspot_Mutation
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
S002,,,,,,,,,,,...,,,-2.533,,,,-1.32,[Frame_Shift_Del],Single_mutation,
S006,,,,,,,,,,,...,,,-3.276,,,,-0.264,[Frame_Shift_Del],Single_mutation,
S007,,,,,,,,,,,...,,,,-0.847,,,,[Missense_Mutation],Single_mutation,TP53_Hotspot_Mutation
S009,-1.137,-1.137,,,,,,,,,...,,,,-0.455,,,-1.129,[Missense_Mutation],Single_mutation,TP53_Hotspot_Mutation
S011,-0.987,-0.987,,,,,,-2.269,-3.363,,...,,,,,,,-1.301,[Frame_Shift_Ins],Single_mutation,TP53_Hotspot_Mutation


#### Classify every sample according to TP53 Mutation Status. Use: Wildtype, Hotspot_Missense, Other_Missense, Truncation

In [9]:
for ind, row in proteomics_df.iterrows():
    #Classify Hotspot Missense Mutations
    if row['TP53_Hotspot_Mutation'] == 'TP53_Hotspot_Mutation':
        proteomics_df.loc[ind, 'TP53_Categorical'] = 'Hotspot_Missense'
        
    #Classify Truncation Mutations
    if 'Frame_Shift_Ins' in row['TP53_Mutation'] or 'Frame_Shift_Del' in row['TP53_Mutation'] or 'Nonsense_Mutation' in row['TP53_Mutation']:
        proteomics_df.loc[ind, 'TP53_Categorical'] = 'Truncation'
        
    #Classify Wildtype
    if row['TP53_Mutation_Status'] == 'Wildtype_Tumor':
        proteomics_df.loc[ind, 'TP53_Categorical'] = 'Wildtype'
        
    #Classify Other Missense Mutations
    if row['TP53_Hotspot_Mutation'] != 'TP53_Hotspot_Mutation':
        if 'Missense_Mutation' in row['TP53_Mutation'] or 'In_Frame_Del' in row['TP53_Mutation'] or 'In_Frame_Ins' in row['TP53_Mutation'] or 'Splice_Site' in row['TP53_Mutation']:
            proteomics_df.loc[ind, 'TP53_Categorical'] = 'Other_Missense'

#Drop columns we no longer need, display df
proteomics_df = proteomics_df.drop(['TP53_Mutation', 'TP53_Mutation_Status', 'TP53_Hotspot_Mutation'], axis = 1)
proteomics_df

Name,AAAS_phosphoproteomics_S495_R.FS*PVLGR.A_NP_001166937,AAAS_phosphoproteomics_S495_R.FS*PVLGR.A_NP_056480,AAGAB_phosphoproteomics_S311_K.AFWMAIGGDRDEIEGLSS*DEEH.-_NP_001258815,AAGAB_phosphoproteomics_S311_K.AFWMAIGGDRDEIEGLSS*DEEH.-_NP_078942,AAK1_phosphoproteomics_S18_R.REQGGSGLGS*GSSGGGGSTSGLGSGYIGR.V_NP_055726,AAK1_phosphoproteomics_S20_R.EQGGSGLGSGS*SGGGGSTSGLGSGYIGR.V_NP_055726,AAK1_phosphoproteomics_S20_R.REQGGSGLGSGS*SGGGGSTSGLGSGYIGR.V_NP_055726,AAK1_phosphoproteomics_S21_R.EQGGSGLGSGSS*GGGGSTSGLGSGYIGR.V_NP_055726,AAK1_phosphoproteomics_S624_K.VGSLTPPSS*PK.T_NP_055726,AAK1_phosphoproteomics_S637_R.ILS*DVTHSAV.F_NP_055726,...,ZZZ3_phosphoproteomics_S113_R.QTEPVS*PVLKR.I_NP_056349,ZZZ3_phosphoproteomics_S113_R.RQTEPVS*PVLK.R_NP_056349,ZZZ3_phosphoproteomics_S113_R.RQTEPVS*PVLKR.I_NP_056349,ZZZ3_phosphoproteomics_S135_R.SEAPNSSEEDS*PIKSDK.E_NP_056349,ZZZ3_phosphoproteomics_S314_F.SETQSSLRDS*EEEVDVVGDSSASK.E_NP_056349,ZZZ3_phosphoproteomics_S391_R.AAPTRGS*PTK.N_NP_056349,ZZZ3_phosphoproteomics_S89_R.GLS*SSEKDNIER.Q_NP_056349,ZZZ3_phosphoproteomics_S90_R.GLSS*SEKDNIER.Q_NP_056349,ZZZ3_phosphoproteomics_S91_R.GLSSS*EKDNIER.Q_NP_056349,TP53_Categorical
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
S002,,,,,,,,,,,...,,,,,-2.533,,,,-1.320,Truncation
S006,,,,,,,,,,,...,,,,,-3.276,,,,-0.264,Truncation
S007,,,,,,,,,,,...,,,,,,-0.847,,,,Hotspot_Missense
S009,-1.137,-1.137,,,,,,,,,...,-0.336,,,,,-0.455,,,-1.129,Hotspot_Missense
S011,-0.987,-0.987,,,,,,-2.269,-3.363,,...,-0.770,,,,,,,,-1.301,Truncation
S012,-1.577,-1.577,,,,-2.316,-1.694,-2.355,,,...,,,,,,,,,-1.127,Wildtype
S013,,,,,,,,,,,...,-0.516,,,,,,,,-0.501,Hotspot_Missense
S015,,,,,-2.938,,-2.606,-1.825,,,...,,,-1.612,,,,,,,Hotspot_Missense
S016,-1.271,-1.271,,,,,,-2.366,-3.525,,...,-4.139,,,,,,,,-2.752,Truncation
S019,-0.439,-0.439,,,,,,,,,...,0.315,,,,,-1.304,,,-0.347,Wildtype


#### Split dataframe into 4 categorical dataframes

In [10]:
hotspot_df = proteomics_df.loc[proteomics_df['TP53_Categorical'] == 'Hotspot_Missense']
truncation_df = proteomics_df.loc[proteomics_df['TP53_Categorical'] == 'Truncation']
other_missense_df = proteomics_df.loc[proteomics_df['TP53_Categorical'] == 'Other_Missense']
wt_df = proteomics_df.loc[proteomics_df['TP53_Categorical'] == 'Wildtype']

#### Use wrap t-test to compare Hotspot_Missense with Truncation

In [11]:
hotspot_missense_truncation_df = pd.concat([hotspot_df, truncation_df])
comp_cols = list(hotspot_missense_truncation_df.columns)
comp_cols.remove('TP53_Categorical')
results = ut.wrap_ttest(hotspot_missense_truncation_df, comparison_columns=comp_cols, label_column='TP53_Categorical')
results

  **kwargs)
  ret = ret.dtype.type(ret / rcount)


Unnamed: 0,Comparison,P_Value
0,HOXA5_phosphoproteomics_S95_R.YSQPATSTHS*PQPDP...,4.649234e-08


#### Use wrap t-test to compare Hotspot_Missense with Wildtype

In [12]:
hotspot_missense_wildtype_df = pd.concat([hotspot_df, wt_df])
comp_cols = list(hotspot_missense_wildtype_df.columns)
comp_cols.remove('TP53_Categorical')
results = ut.wrap_ttest(hotspot_missense_wildtype_df, comparison_columns=comp_cols, label_column='TP53_Categorical')
results

No significant comparisons.


#### Use wrap t-test to compare Hotspot_Missense with Other Missense

In [13]:
hotspot_missense_other_missense_df = pd.concat([hotspot_df, other_missense_df])
comp_cols = list(hotspot_missense_other_missense_df.columns)
comp_cols.remove('TP53_Categorical')
results = ut.wrap_ttest(hotspot_missense_other_missense_df, comparison_columns=comp_cols, label_column='TP53_Categorical')
results

No significant comparisons.


### Conclusions:

There were not significant comparisons, after adjusting for multiple testing using a bonferroni correction.