# KRAS: Effects of Mutation

<b> Standard imports for playing with and plotting data frames. </b>

In [9]:
import pandas as pd
import numpy as np
import scipy.stats
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
import seaborn as sns
import altair as alt

alt.renderers.enable('notebook')

RendererRegistry.enable('notebook')

<b> Import CPTAC </b>

In [10]:
import CPTAC

In [11]:
somatic_mutations = CPTAC.get_somatic()
proteomics = CPTAC.get_proteomics()
phos = CPTAC.get_phosphoproteomics()

<b> List of proteins that interact with KRAS (according to STRING) </b>

In [12]:
protList = ['EGRF', 'PIK3CA', 'ERBB2', 'ARAF', 'BRAF', 'MAPK1', 'MAPK3', 'MAP2K1', 'SOS1', 'RAF1']

### Proteome abundance of proteins that interact with KRAS

In [13]:
gene = 'KRAS'
sigList = [];

pcutoff = 0.05/len(protList)
print(pcutoff)
pcutoff = 1


genedf = somatic_mutations[gene].to_frame()
for protein in protList:
    if protein in proteomics.columns:
        proteindf = proteomics[protein].to_frame()
        proteindfName = protein + " protein"
        proteindf.columns = [proteindfName]
        cross = genedf.add(proteindf, fill_value=0).dropna(axis=0)
        mutated = cross.loc[cross[gene] == 1.0]
        wt = cross.loc[cross[gene] == 0.0]
        ttest = scipy.stats.ttest_ind(mutated[proteindfName], wt[proteindfName])
        if ttest[1] <= pcutoff:
            sigList.append(protein)
            print("Test for " + protein + ": ")
            print(ttest)

0.005
Test for PIK3CA: 
Ttest_indResult(statistic=-0.7641503920005918, pvalue=0.44661359816697277)
Test for ERBB2: 
Ttest_indResult(statistic=-0.17062703303108165, pvalue=0.864868819822983)
Test for ARAF: 
Ttest_indResult(statistic=-0.3722176071624872, pvalue=0.7105345313274455)
Test for BRAF: 
Ttest_indResult(statistic=-0.6718701507432219, pvalue=0.5032471990259206)
Test for MAPK1: 
Ttest_indResult(statistic=0.2904176595402326, pvalue=0.7721103995980599)
Test for MAPK3: 
Ttest_indResult(statistic=0.3170397340391074, pvalue=0.7518878736991584)
Test for MAP2K1: 
Ttest_indResult(statistic=-0.4028611650203923, pvalue=0.687927857065211)
Test for SOS1: 
Ttest_indResult(statistic=0.2223721467553348, pvalue=0.824486974866286)
Test for RAF1: 
Ttest_indResult(statistic=2.6230493388008855, pvalue=0.010106929596048609)


### Phosphoproteome abundance of interacting proteins

In [14]:
phosProtList = ['EGRF', 'PIK3CA', 'ERBB', 'ARAF', 'BRAF', 'MAPK', 'MAPK', 'MAP2K', 'SOS', 'RAF']

In [15]:
gene = 'KRAS'
genedf = somatic_mutations[gene].to_frame()
sigPhosResults = [];

for protein in phosProtList:
    sites = phos.filter(regex=protein)
    genedf = genedf.add(sites, fill_value=0)

mutated = genedf.loc[genedf[gene] == 1.0]
wt = genedf.loc[genedf[gene] == 0.0]

pcutoff = 0.05 / len(genedf.columns)
print(pcutoff)
pcutoff = 1

for loc in genedf.columns:
    if not loc == gene:
        mutsitedf = mutated[[gene, loc]].dropna()
        wtsitedf = wt[[gene, loc]].dropna()
        ttest = scipy.stats.ttest_ind(mutsitedf[loc], wtsitedf[loc])
        if(ttest[1] <= pcutoff):
            sigPhosResults.append(loc)
            print('Results for ' + loc + ': ')
            print(ttest)
 

0.0006172839506172839
Results for ARAF-S175: 
Ttest_indResult(statistic=2.0930363361187676, pvalue=0.04007913981271484)
Results for ARAF-S189: 
Ttest_indResult(statistic=2.8717268608189164, pvalue=0.00500365376694708)
Results for ARAF-S260: 
Ttest_indResult(statistic=5.1539316473499825, pvalue=1.665516443552024e-06)
Results for ARAF-S272: 
Ttest_indResult(statistic=-0.07863367799951307, pvalue=0.9375856630651855)
Results for ARAF-S275: 
Ttest_indResult(statistic=-0.07863367799951307, pvalue=0.9375856630651855)
Results for ARAF-T184: 
Ttest_indResult(statistic=1.8602349698111367, pvalue=0.06611822401968898)
Results for ARAF-T256: 
Ttest_indResult(statistic=3.638629367252144, pvalue=0.0005162594349961674)
Results for BRAF-S151: 
Ttest_indResult(statistic=2.7908693118026804, pvalue=0.00647445466466053)
Results for BRAF-S365: 
Ttest_indResult(statistic=-0.5101650927252876, pvalue=0.6114962612343113)
Results for BRAF-S446: 
Ttest_indResult(statistic=0.20449767889983983, pvalue=0.83857545381