# PI3KR1: Effects of mutation on IRS2

<b> Standard imports for playing with and plotting data frames. </b>

In [3]:
import pandas as pd
import numpy as np
import scipy.stats
from scipy.stats import pearsonr
import matplotlib.pyplot as plt
import seaborn as sns
#import altair as alt

#alt.renderers.enable('notebook')

<b> Import CPTAC </b>

In [5]:
import CPTAC

Collecting CPTAC
[31m  Could not find a version that satisfies the requirement CPTAC (from versions: )[0m
[31mNo matching distribution found for CPTAC[0m


ModuleNotFoundError: No module named 'CPTAC'

In [None]:
somatic_mutations = CPTAC.get_somatic()
proteomics = CPTAC.get_proteomics()
phos = CPTAC.get_phosphoproteomics()

# Difference in protein abundance 

In [None]:
gene = 'PIK3R1'
protList = ['IRS2']

pcutoff = 0.05/len(protList)
print(pcutoff)

genedf = somatic_mutations[gene].to_frame()
for protein in protList:
    if protein in proteomics.columns:
        proteindf = proteomics[protein].to_frame()
        proteinfindName = protein + "protein"
        proteindf.columns = [proteinfindName]
        cross = genedf.add(proteindf, fill_value=0).dropna(axis=0)
        mutated = cross.loc[cross[gene] == 1.0]
        wt = cross.loc[cross[gene] == 0.0]
        ttest = scipy.stats.ttest_ind(mutated[proteinfindName], wt[proteinfindName])
        print("Test for " + protein + ": ")
        print(ttest)

<b> Plot the data </b>

In [None]:
somatic_boxplot = sns.boxplot(data=cross, x=gene ,y=proteinfindName)
somatic_boxplot.set_title(gene + " gene mutation and " + protList[0] + " protein abundance")
somatic_boxplot = sns.stripplot(data=cross, x=gene, y=proteinfindName,jitter=True, color=".3")
somatic_boxplot.set(xlabel="Somatic Gene Mutation",ylabel="Proteomics")

# Difference in phosphoproteome abundance 

In [None]:
gene = 'PIK3R1'
sites = phos.filter(regex='IRS2')
sigResults = [];

genedf = somatic_mutations[gene].to_frame()
cross = genedf.add(sites, fill_value=0)
mutated = cross.loc[cross[gene] == 1.0]
wt = cross.loc[cross[gene] == 0.0]

pcutoff = 0.05/len(cross.columns)
print(pcutoff)
#pcutoff = 1

for loc in cross.columns:
    print(loc)
    if not loc == gene:
        mutsitedf = mutated[[gene, loc]].dropna()
        wtsitedf = wt[[gene, loc]].dropna()
        ttest = scipy.stats.ttest_ind(mutsitedf[loc], wtsitedf[loc])
        if ttest[1] <= pcutoff:
            sigResults.append(loc)
            print('Results for ' + loc + ': ')
            print(ttest)

<b> Plot the data </b>

In [None]:
x = 0
somatic_boxplot = sns.boxplot(data=cross, x=gene ,y=sigResults[x])
somatic_boxplot.set_title(gene + " gene mutation and " + sigResults[x] + " phosphorylation levels")
somatic_boxplot = sns.stripplot(data=cross, x=gene, y=sigResults[x],jitter=True, color=".3")
somatic_boxplot.set(xlabel="Somatic Gene Mutation",ylabel="Phosphoproteomics")

<b> Conclusion: Protein and phosphoprotein abundance are lower when PIK3R1 is mutated </b>

Note: Previous studies show that IRS2 is involved in activating PI3K in the PI3K/AKT signal pathway. (https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4140254/)