   # PIK3R1: Effects of Mutation

<b>Standard imports for playing with and plotting data frames.</b>

In [23]:
import pandas as pd
import numpy as np
import scipy.stats
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
import seaborn as sns
import re as re
import statsmodels.stats.multitest
#import altair as alt

#alt.renderers.enable('notebook')

<b>Import CPTAC data</b>

In [16]:
import CPTAC

In [17]:
somatic_mutations = CPTAC.get_somatic()
proteomics = CPTAC.get_proteomics()
phos = CPTAC.get_phosphoproteomics()

### List of proteins that interact with PIK3R1 (according to Uniprot)

In [18]:
protList = ['ABI1', 'ABL2', 'AR', 'CBL', 'EGFR', 'ERBB2', 'ERBB3', 'ESR1', 'FASLG', 'FGFR1', 'RRAS', 'NRAS', 'PTEN', 'IRS1', 'IRS2', 'KIT', 'HRAS', 'IRS1', 'PIK3R1', 'PIK3CA', 'PIK3CD' 'RPS6KB1', 'AKT1', 'MRAS']

### Proteome abundance of proteins that interact with PIK3R1

In [19]:
gene = 'PIK3CA'
tested = [];
p_vals = [];
for protein in protList: 
    if protein in proteomics.columns:
        tested.append(protein)
        cross = CPTAC.compare_mutations(proteomics, protein, gene)
        cross = cross[:100]
        cross = cross[["Mutation", protein]].dropna(axis=0)
        mutated = cross.loc[cross["Mutation"] != "Wildtype"]
        wt = cross.loc[cross["Mutation"] == "Wildtype"]
        ttest = scipy.stats.ttest_ind(mutated[protein], wt[protein])
        p_vals.append(ttest[1])

### List of significantly affected proteins

In [20]:
print(sigList)

[]


### Phosphoproteome abundance of interacting proteins

In [21]:
phosProtList = ['ABI1', 'ABL2', 'EGFR', 'ERBB', 'ESR', 'FASLG', 'FGFR', 'RAS', 'PTEN', 'KIT', 'HRAS', 'IRS', 'PIK3R', 'PIK3CA', 'PIK3CD' 'RPS6KB', 'AKT', 'MRAS']

In [24]:
sites = phos.columns
p_values = []
site_names = []


alpha = 0.05
p_value = alpha/len(phosProtList)



for protein in phosProtList:
    pattern = re.compile(protein)
    isInList = filter(pattern.search, sites)
    if next(isInList, None) is not None:
        phosphositesdf = CPTAC.compare_mutations(phos, protein, gene)
        phosphositesdf = phosphositesdf.loc[phosphositesdf['Patient_Type'] == 'Tumor'].drop('Patient_Type', axis = 1)
        for site in phosphositesdf.columns:
            if (site is not 'Mutation'):
                sitedf = CPTAC.compare_mutations(phos, site, gene)
                mutateddf = sitedf.loc[sitedf['Mutation'] != 'Wildtype'].dropna(axis=0)
                wtdf = sitedf.loc[sitedf['Mutation'] == 'Wildtype'].dropna(axis=0)
                ttest = scipy.stats.ttest_ind(mutateddf[site], wtdf[site])
                p_values.append(ttest[1])
                site_names.append(site)     
                
areSignificant = statsmodels.stats.multitest.fdrcorrection(p_values)[0]
pvalues = statsmodels.stats.multitest.fdrcorrection(p_values)[1]
significant_sites = np.array(site_names)[np.array(areSignificant)]
significant_pvalues = np.array(pvalues)[np.array(areSignificant)]

significant_vals = dict(zip(significant_sites, significant_pvalues))

print(significant_sites)
print(significant_pvalues)
print(significant_vals)

['ABI1-T200' 'EGFR-S1166' 'ERBB2-S1151' 'ESRP1-S543' 'ESRP2-S563'
 'GORASP2-T415' 'GPRASP2-S328' 'GPRASP2-S330' 'GPRASP2-S512' 'GRASP-S94'
 'RASAL2-S804' 'RASAL3-S228' 'RASGRF2-S746' 'RASGRF2-S852' 'RASGRP2-S117'
 'RASGRP2-S576' 'RASGRP2-S578' 'RASGRP2-T399' 'RASIP1-S328' 'RASIP1-S331'
 'RASIP1-S41' 'RASIP1-S419' 'RASL11A-S217' 'RASSF2-S145' 'RASSF4-S141'
 'RASSF8-S129' 'IRS1-S1005' 'IRS1-S1100' 'IRS1-S1101' 'IRS1-S1134'
 'IRS1-S270' 'IRS1-S323' 'IRS1-S330' 'IRS1-S374' 'IRS1-S413' 'IRS1-S415'
 'IRS1-S419' 'IRS1-S421' 'IRS1-S527' 'IRS1-S531' 'IRS1-S629' 'IRS1-S672'
 'IRS2-S1100' 'IRS2-S391' 'IRS2-S560' 'IRS2-S577' 'IRS2-S915' 'IRS2-S973'
 'PIK3R4-S853' 'PIK3CA-S312' 'PIK3CA-T313' 'AKT1-S126' 'AKT3-T447']
[8.09119151e-03 8.76162852e-03 4.27201453e-02 1.97540831e-03
 1.07542253e-03 3.19593408e-02 2.39221732e-03 1.78812120e-02
 1.07542253e-03 1.69880434e-02 8.76162852e-03 3.08331866e-02
 1.34638584e-02 6.19370923e-03 8.09119151e-03 5.31322175e-05
 1.22086873e-04 7.38571999e-04 1.07542253e-

### List of significantly affected phosphorylation sites

In [None]:
print(sigPhosResults)