# Examine trans effect of TP53 Hotspot mutation in Ovarian Cancer

#### Library Imports

In [1]:
import pandas as pd
import numpy as np
import scipy.stats

import cptac
import cptac.utils as ut

ov = cptac.Ovarian()



                                    

## Use parse hotspot utility to analyze TP53 hotspot mutation in Ovarian cancer

#### Get somatic mutation data from cptac

In [2]:
somatic_mutations = ov.get_somatic_mutation()

#### Use parse hotspot utility

In [3]:
vis, binary, detailed, dictionary = ut.parse_hotspot('../../HotspotClusterFiles/ovarian_somatic_38.maf.3D_Proximity.pairwise.recurrence.l0.r10.clusters', somatic_mutations)

#### Select only TP53 Hotspots

In [5]:
TP53_Hotspots = detailed[['TP53','TP53_1','TP53_2','TP53_3','TP53_4']]
TP53_Hotspots.head()

Unnamed: 0_level_0,TP53,TP53_1,TP53_2,TP53_3,TP53_4
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
S002,No,No,No,No,No
S006,No,No,No,No,No
S007,No,Yes_HS,No,No,No
S009,No,Yes,No,No,No
S011,No,Yes,No,No,No
S012,No,No,No,No,No
S013,No,Yes,No,No,No
S015,No,Yes,No,No,No
S016,No,Yes,No,No,No
S019,No,No,No,No,No


#### Make binary column for samples that have a TP53 Hotspot Mutation

In [6]:
TP53_Hotspots['Hotspot_Mutation'] = False
for ind, row in TP53_Hotspots.iterrows():
    for col in TP53_Hotspots.columns:
        if row[col] == 'Yes_HS':
            TP53_Hotspots.loc[ind,'Hotspot_Mutation'] = True

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [7]:
binary_hotspots = TP53_Hotspots['Hotspot_Mutation']

#### Get Proteomic data, append Binary hotspot mutation column

In [8]:
proteomics_df = ov.join_omics_to_mutations(omics_df_name='proteomics', mutations_genes='TP53')

#Remove Non-Tumor Samples
proteomics_df = proteomics_df.loc[proteomics_df['Sample_Status',np.nan] == 'Tumor']

#Remove columns that are not needed
proteomics_df = proteomics_df.drop(['TP53_Mutation','TP53_Location','TP53_Mutation_Status','Sample_Status'],axis=1)
proteomics_df['TP53_Hotspot_Mutation'] = binary_hotspots

#Reduce multiindex
proteomics_df = ov.reduce_multiindex(proteomics_df, flatten=True)
proteomics_df 

  new_axis = axis.drop(labels, errors=errors)


Name,A1BG_proteomics_NP_570602,A2M_proteomics_NP_000005,A2ML1_proteomics_NP_653271,AAAS_proteomics_NP_056480,AACS_proteomics_NP_076417,AAGAB_proteomics_NP_078942,AAK1_proteomics_NP_055726,AAMDC_proteomics_NP_078960,AAMP_proteomics_NP_001078,AAR2_proteomics_NP_001258803,...,ZSCAN32_proteomics_NP_001271456,ZSWIM8_proteomics_NP_001229416,ZW10_proteomics_NP_004715,ZWILCH_proteomics_NP_060445,ZWINT_proteomics_NP_008988,ZYG11B_proteomics_NP_078922,ZYX_proteomics_NP_001010972,ZZEF1_proteomics_NP_055928,ZZZ3_proteomics_NP_056349,TP53_Hotspot_Mutation_
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
S002,0.133634,0.765120,-0.778849,0.118236,0.112740,0.054942,0.313055,-0.786270,-0.384680,0.064017,...,-0.323516,0.074209,0.232999,-0.195793,,-0.145053,0.590217,0.019414,,False
S006,-0.432786,-0.461906,-0.533497,0.041800,0.604114,0.351294,0.546605,0.603935,-0.235374,0.059084,...,0.032004,0.016276,0.587911,0.574339,,0.075401,-0.437564,-0.208831,,False
S007,-0.013943,0.065842,,-0.010514,0.541935,0.169826,-0.023032,-0.057549,-0.397627,0.199581,...,0.002237,0.206993,0.111037,1.020321,,-0.245864,0.008411,0.130504,-0.367433,True
S009,1.234203,1.512082,-0.703050,0.181152,0.087719,0.226861,-0.100766,-0.516022,-0.456512,-0.257629,...,,-0.529948,0.034713,0.149523,-0.313370,-0.507473,0.221818,0.278313,-1.008998,False
S011,-0.482872,0.343872,-1.150936,0.315155,0.097235,-0.160208,-0.125165,-0.768776,-0.372368,0.450086,...,-0.752819,-0.181027,0.179612,0.007854,-0.106049,0.377063,-0.295592,0.011997,-0.236003,False
S012,-0.956518,-0.312811,-0.556404,-0.069265,-0.268706,-0.145965,0.376177,-1.283498,-0.086017,0.330957,...,,-0.165247,0.331122,-0.046395,-0.925683,0.085888,-0.143662,0.506730,-1.023969,False
S013,-0.102617,-0.338799,-0.052664,0.176517,-0.400002,-0.199432,0.009326,-0.334037,-0.281129,0.209304,...,0.029621,0.043402,0.258654,-0.812751,-0.517359,0.285841,-0.264405,-0.589593,0.172219,False
S015,-1.318564,-0.492472,,0.722331,0.702353,-0.519647,-0.568089,0.162200,0.624114,1.022120,...,-1.478972,-0.273801,0.266843,-0.058109,0.271219,0.316924,-0.933430,-0.050844,-0.199283,False
S016,-0.343398,-0.523344,-1.139941,0.440155,-0.290471,0.099976,-0.021885,-0.059720,-0.288203,0.270428,...,0.861205,0.105740,0.247778,0.176014,-0.318478,0.251725,0.017359,0.066301,-0.208623,False
S019,0.336833,0.063341,0.623607,0.347631,0.522685,0.159752,0.286359,-0.260604,-0.451290,-0.146330,...,,-0.383400,0.425142,-1.087164,-0.412575,-0.466996,0.000623,0.036598,-0.659354,False


#### Use wrap_ttest to do a comparison on each proteomics value, comparing samples with a hotspot mutation to those without a hotspot mutation

In [9]:
compar_cols = list(proteomics_df.columns)
compar_cols.remove('TP53_Hotspot_Mutation_')
wrap_results = ut.wrap_ttest(proteomics_df, comparison_columns=compar_cols, label_column='TP53_Hotspot_Mutation_')

  **kwargs)
  ret = ret.dtype.type(ret / rcount)


No significant comparisons.
