# PIK3CA: Effects of Mutation

<b> Standard imports for playing with and plotting data frames. </b>

In [21]:
import pandas as pd
import numpy as np
import scipy.stats
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
import seaborn as sns
#import altair as alt

#alt.renderers.enable('notebook')


<b> Import CPTAC </b>

In [22]:
import CPTAC

In [23]:
somatic_mutations = CPTAC.get_somatic()
proteomics = CPTAC.get_proteomics()
phos = CPTAC.get_phosphoproteomics()

<b> List of proteins that interact with PIK3R1 (according to STRING) </b>

In [40]:
protList = ['IRS1', 'IRS2', 'AKT1', 'AKT2', 'P55G', 'ERBB3', 'P85A', 'MRAS', 'PTEN', 'RPS6KB1','PI3KR1']

<b> Proteome abundance of proteins that interact with PIK3R1 </b>

In [41]:
pcutoff = 0.05 / len(protList)
gene = 'PIK3CA'

sigResults = [];
for protein in protList: 
    if protein in proteomics.columns:
        cross = CPTAC.merge_mutations(proteomics, protein, gene)
        cross = cross[["Mutation", protein]].dropna(axis=0)
        mutated = cross.loc[cross["Mutation"] != "Wildtype"]
        wt = cross.loc[cross["Mutation"] == "Wildtype"]
        ttest = scipy.stats.ttest_ind(mutated[protein], wt[protein])
        if(ttest[1] <= pcutoff):
            sigResults.append(protein)
            print("Test for " + protein + ": ")
            print(ttest) 

Test for IRS1: 
Ttest_indResult(statistic=-3.624921787264356, pvalue=0.00040553855122213927)
Test for IRS2: 
Ttest_indResult(statistic=-4.305242968682799, pvalue=3.4083077260524325e-05)
Test for AKT2: 
Ttest_indResult(statistic=-4.5103219547783135, pvalue=1.3713846510248371e-05)
Test for PTEN: 
Ttest_indResult(statistic=-4.112896462551923, pvalue=6.679197651007237e-05)


<b> Print the list of proteins with significant results </b>

In [26]:
print(sigResults)

['IRS1', 'IRS2', 'AKT2', 'PTEN']


### Phosphoproteome abundance of interacting proteins

In [37]:
phosProtList = ['IRS1', 'AKT1', 'AKT2', 'P55G', 'ERBB3', 'P85A', 'MRAS', 'PTEN', 'RPS6KB1']

In [39]:
#Specify gene of interest
gene = 'PIK3CA'
genedf = somatic_mutations[gene].to_frame()
sigPhosResults = [];

#Build the dataframe with all the phosphorylation sites on the proteins listed above
for protein in phosProtList:
    sites = phos.filter(regex=protein)
    genedf = genedf.add(sites, fill_value=0)

mutated = genedf.loc[genedf[gene] == 1.0]
wt = genedf.loc[genedf[gene] == 0.0]

#Bonferroni correction for all the sites we are testing at once
pcutoff = 0.05 / len(genedf.columns)

#Test each location one by one and print significant results
for loc in genedf.columns:
    if not loc == gene:
        
        mutsitedf = mutated[[gene, loc]].dropna()
        wtsitedf = wt[[gene, loc]].dropna()
        
        ttest = scipy.stats.ttest_ind(mutsitedf[loc], wtsitedf[loc])
        if(ttest[1] <= pcutoff):
            sigPhosResults.append(loc)
            print('Results for ' + loc + ': ')
            print(ttest)   

KeyError: 'PIK3CA'