# Final Figures for PIK3CA Story
### Phosphorylation levels

### Standard Imports

In [1]:
import pandas as pd
import numpy as np
import re
import scipy.stats
import statsmodels.stats.multitest
import matplotlib.pyplot as plt
import seaborn as sns
import json
import operator

#import the Endometrial data from CPTAC package
import CPTAC.Endometrial as CPTAC

Welcome to the CPTAC data service package. This import contains
information about the package. In order to access a specific data set,
import a CPTAC subfolder by either 'import CPTAC.DataName' or 'from
CPTAC import DataName'.
Loading Endometrial CPTAC data:
Loading Dictionary...
Loading Clinical Data...
Loading Proteomics Data...
Loading Transcriptomics Data...
Loading CNA Data...
Loading Phosphoproteomics Data...
Loading Somatic Mutation Data...

 ******PLEASE READ******
CPTAC is a community resource project and data are made available
rapidly after generation for community research use. The embargo
allows exploring and utilizing the data, but the data may not be in a
publication until July 1, 2019. Please see
https://proteomics.cancer.gov/data-portal/about/data-use-agreement or
enter embargo() to open the webpage for more details.


### Get phosphorylation data and somatic mutations from CPTAC

In [2]:
somatic = CPTAC.get_somatic()
phos = CPTAC.get_phosphoproteomics()

### Make Phosphoproteomics box plot for AKT1, AKT2, PTEN, PIK3CA
##### Show data for hotspot mutations vs all other mutations or wildtype

In [11]:
'''Use PIK3CA as gene, AKT1, AKT2, PTEN, and PIK3CA as proteins'''
gene = 'PIK3CA'
protList = ['AKT1','AKT2','PTEN','PIK3CA']

'''A list of mutations present in the hotspot according to our Hotspot3D output'''
hotspot_mutations = ['p.E545A', 'p.E545K', 'p.E545V', 'p.Q546P', 'p.Q546R', 'p.E542K']
mutated_hotspot = somatic.loc[(somatic['Location'].isin(hotspot_mutations)) & (somatic['Gene'] == gene)]
hotspot_patients = mutated_hotspot['Clinical_Patient_Key']

'''Get the PIK3CA mutation dataframe'''
plotdf = CPTAC.compare_mutations(phos, gene)

'''Look only at Tumor patients'''
plotdf = plotdf.loc[plotdf['Patient_Type'] == 'Tumor'][['Mutation']]

'''Add in the protein levels for every protein in the list'''
for protein in protList:
    proteindf = CPTAC.compare_mutations(phos, protein, gene)
    proteindf = proteindf.loc[proteindf['Patient_Type'] == 'Tumor']
    for site in proteindf:
        if site != 'Mutation' and site != 'Patient_Type':
            plotdf = plotdf.join(proteindf[site])
            
            
'''Reformat the dataframe to classify Hotspot Mutations, and all other types'''
for ind, row in plotdf.iterrows():
    if ind in hotspot_patients.values:
        plotdf.at[ind, 'Mutation'] = 'Hotspot Mutation'
    else:
        plotdf.at[ind, 'Mutation'] = 'Non-Hotspot Mutation or Wildtype'

print(plotdf)

                              Mutation  AKT1-S122  AKT1-S124  AKT1-S126  \
S001                  Hotspot Mutation        NaN   -0.04605   0.148100   
S002  Non-Hotspot Mutation or Wildtype    -0.2500   -0.26300  -0.121000   
S003                  Hotspot Mutation        NaN    0.21400   0.050500   
S004  Non-Hotspot Mutation or Wildtype        NaN   -0.36800   0.320000   
S005  Non-Hotspot Mutation or Wildtype        NaN   -0.00300  -0.028200   
S006  Non-Hotspot Mutation or Wildtype        NaN   -0.18965  -0.039300   
S007  Non-Hotspot Mutation or Wildtype     0.2170    0.16500   0.228000   
S008  Non-Hotspot Mutation or Wildtype        NaN    0.08425  -0.020100   
S009                  Hotspot Mutation        NaN   -0.04670   0.575000   
S010  Non-Hotspot Mutation or Wildtype        NaN    0.09905   0.245000   
S011  Non-Hotspot Mutation or Wildtype        NaN   -0.16100  -0.298000   
S012  Non-Hotspot Mutation or Wildtype        NaN   -0.08310  -0.134100   
S013  Non-Hotspot Mutatio