## Notebook examining proteins listed in this paper as indicators of G2/M phase

https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5650473/

#### Listed Proteins

CDC20, AURKA, KIFC1, CCNB1, ANLN, HMMR, KIF20A, AURKB, CCNB2, RRM2, PLK1, RACGAP1, CENPF and PRC1

#### Library Imports

In [1]:
import pandas as pd
import numpy as np
import scipy.stats

import seaborn as sns
import matplotlib.pyplot as plt

import cptac
import cptac.utils as ut

en = cptac.Endometrial()

                                    

DataVersionNotInstalledError: Data version 2.1 is not installed. To install, run "cptac.download(dataset='endometrial', version='2.1')".

#### Specify Mutation Gene, Create List of Proteins

In [None]:
mut_gene = 'RB1'
prots = ['CDC20','AURKA','KIFC1','CCNB1','ANLN','HMMR','KIF20A','AURKB','CCNB2','RRM2','PLK1','RACGAP1','CENPF','PRC1']

#### Compare proteomics of the listed proteins with TP53 Mutation

In [None]:
#Join TP53 Mutations to proteomic data for given proteins
prot_df = en.join_omics_to_mutations(mutations_genes=mut_gene, omics_genes=prots, omics_df_name='proteomics')

#Keep only Tumor Samples
prot_df = prot_df.loc[prot_df['Sample_Status'] == 'Tumor']

#Create binary classification of Mutated/Wildtype
for ind, row in prot_df.iterrows():
    if row[mut_gene + '_Mutation_Status'] == 'Wildtype_Tumor':
        prot_df.loc[ind, mut_gene + '_Mutation_Binary'] = 'Wildtype'
    else:
        prot_df.loc[ind, mut_gene + '_Mutation_Binary'] = 'Mutated'
        
#Drop unneeded columns
prot_df = prot_df.drop([mut_gene+'_Mutation', mut_gene+'_Location', mut_gene+'_Mutation_Status','Sample_Status'], axis=1)
prot_df.head()

#### Determine significant comparisons when adjusting for multiple testing

In [None]:
comp_cols = list(prot_df.columns)
comp_cols.remove(mut_gene+'_Mutation_Binary')
results = ut.wrap_ttest(df=prot_df, label_column=mut_gene+'_Mutation_Binary', comparison_columns=comp_cols, verbose=True)

#### Show results that were statistically significant

In [None]:
results

#### Generate plot of these proteins

In [None]:
#Specify dimensions and settings
a4_dims = (40.4, 16.54)
fig, ax = plt.subplots(figsize=a4_dims)
plotdf = pd.melt(prot_df, id_vars=['TP53_Mutation_Binary'], value_vars=comp_cols)
plotdf = plotdf.replace(to_replace=r'_proteomics', value='', regex=True)
my_pal = {"Wildtype": "blue", "Mutated": "orange"}

#Make boxplot
plot = sns.boxplot(data=plotdf, y='value', x='Name',hue='TP53_Mutation_Binary', hue_order=['Wildtype','Mutated']).set_title("Proteomics vs. TP53 Mutation",fontsize = '40')

#Overlay stripplot
plot = sns.stripplot(data=plotdf, y='value', x='Name',hue='TP53_Mutation_Binary', hue_order=['Wildtype','Mutated'], dodge=True, jitter=True, color='.3')

#Change Style and label sizes
plot.tick_params(labelsize=20)
plot.set_xlabel('')
plot.set_ylabel('Protein Abundance', fontsize='20')
plot.set_title('Proteomic abundance of 14 proteins known to be high during M phase vs. TP53 mutation status in Endometrial Cancer', fontsize = '35')
sns.set_style("white")

#Adjust the Legend
handles, labels = ax.get_legend_handles_labels()
plt.legend(handles[0:2],['Wildtype','Mutated'], title='TP53 Status', fontsize='20', frameon=True)
plot.get_legend().get_title().set_fontsize('25')