   # PIK3R1: Effects of Mutation

<b>Standard imports for playing with and plotting data frames.</b>

In [30]:
import pandas as pd
import numpy as np
import scipy.stats
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
import seaborn as sns
import altair as alt

alt.renderers.enable('notebook')

RendererRegistry.enable('notebook')

<b>Import CPTAC data</b>

In [31]:
import CPTAC

In [5]:
somatic_mutations = CPTAC.get_somatic()
proteomics = CPTAC.get_proteomics()
phos = CPTAC.get_phosphoproteomics()

### List of proteins that interact with PIK3R1 (according to Uniprot)

In [23]:
protList = ['RRAS', 'NRAS', 'PTEN', 'IRS2', 'HRAS', 'IRS1', 'PIK3R1', 'RPS6KB1', 'AKT1', 'MRAS']

### Proteome abundance of proteins that interact with TP53

In [32]:
gene = 'PIK3R1'
sigList = [];

pcutoff = 0.05/len(protList)
#pcutoff = 1

genedf = somatic_mutations[gene].to_frame()
for protein in protList:
    if protein in proteomics.columns:
        proteindf = proteomics[protein].to_frame()
        proteindfName = protein + " protein"
        proteindf.columns = [proteindfName]
        cross = genedf.add(proteindf, fill_value=0).dropna(axis=0)
        mutated = cross.loc[cross[gene] == 1.0]
        wt = cross.loc[cross[gene] == 0.0]
        ttest = scipy.stats.ttest_ind(mutated[proteindfName], wt[proteindfName])
        if ttest[1] <= pcutoff:
            sigList.append(protein)
            print("Test for " + protein + ": ")
            print(ttest)

Test for IRS2: 
Ttest_indResult(statistic=-4.387580404733772, pvalue=3.20389610243306e-05)


### List of significantly affected proteins

In [26]:
print(sigList)

['IRS2']


### Phosphoproteome abundance of interacting proteins

In [27]:
phosProtList = ['PIK3R', 'IRS', 'RUFY', 'RASD', 'APPL', 'HRAS', 'KRAS',
           'FAM83', 'PI3K', 'AKT']

In [28]:
gene = 'PIK3R1'
genedf = somatic_mutations[gene].to_frame()
sigPhosResults = [];

for protein in protList:
    sites = phos.filter(regex=protein)
    genedf = genedf.add(sites, fill_value=0)

mutated = genedf.loc[genedf[gene] == 1.0]
wt = genedf.loc[genedf[gene] == 0.0]

pcutoff = 0.05 / len(genedf.columns)
#pcutoff = 1

for loc in genedf.columns:
    if not loc == gene:
        mutsitedf = mutated[[gene, loc]].dropna()
        wtsitedf = wt[[gene, loc]].dropna()
        ttest = scipy.stats.ttest_ind(mutsitedf[loc], wtsitedf[loc])
        if(ttest[1] <= pcutoff):
            sigPhosResults.append(loc)
            print('Results for ' + loc + ': ')
            print(ttest)
 

Results for IRS2-S1100: 
Ttest_indResult(statistic=-4.019029388226416, pvalue=0.0001505481884237998)
Results for IRS2-S679: 
Ttest_indResult(statistic=-3.5414593756235186, pvalue=0.0007181350946428204)
Results for IRS2-S731: 
Ttest_indResult(statistic=-4.2075513087547245, pvalue=8.333919696760065e-05)


### List of significantly affected phosphorylation sites

In [29]:
print(sigPhosResults)

['IRS2-S1100', 'IRS2-S679', 'IRS2-S731']
