# Let's go through Brittany's code

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
import gseapy as gp
import re
import sys 

import cptac
import cptac.utils as u

import sys
sys.path.append('/Users/hannahboekweg/WhenMutationsDontMatter')
import plot_utils as p



In [2]:
endo = cptac.Endometrial()

                                    

In [3]:
prot_and_mutations = endo.get_genotype_all_vars("PIK3CA")
prot_and_mutations.head()



Name,Mutation,Location,Mutation_Status
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
C3L-00006,Missense_Mutation,p.E545K,Single_mutation
C3L-00008,Wildtype_Tumor,No_mutation,Wildtype_Tumor
C3L-00032,Missense_Mutation,p.E545K,Single_mutation
C3L-00090,Wildtype_Tumor,No_mutation,Wildtype_Tumor
C3L-00098,Amplification,Amplification,Single_mutation


In [4]:
prot_and_mutations['Mutation'].unique()

array(['Missense_Mutation', 'Wildtype_Tumor', 'Amplification',
       'Frame_Shift_Ins', 'In_Frame_Del', 'Deletion'], dtype=object)

In [5]:
keep = ['Missense_Mutation','Wildtype_Tumor']
in_keep = prot_and_mutations['Mutation'].isin(keep)
miss_mutations = prot_and_mutations[in_keep]
miss_mutations['Mutation'].value_counts()
miss_mutations["Mutation_Status"].unique()

array(['Single_mutation', 'Wildtype_Tumor', 'Multiple_mutation'],
      dtype=object)

In [6]:
ip = u.get_interacting_proteins('PIK3CA')

In [7]:
len(ip)

36

In [13]:
gene = 'PIK3CA'

In [14]:
# Step 3 - Format the dataframe correctly for the T-test(just omics and binary columns for tumors)
columns_to_drop = ["Location", "Mutation_Status"]
mut_status = miss_mutations.drop(columns_to_drop, axis = 1)
mut_status = mut_status.dropna(axis=1,how='all')
mut_status['Mutation'].value_counts()
mut_status
# mut_status.loc[mut_status.index=="C3N-01825"]

Name,Mutation
Patient_ID,Unnamed: 1_level_1
C3L-00006,Missense_Mutation
C3L-00008,Wildtype_Tumor
C3L-00032,Missense_Mutation
C3L-00090,Wildtype_Tumor
C3L-00136,Wildtype_Tumor
...,...
C3N-01510,Missense_Mutation
C3N-01520,Missense_Mutation
C3N-01537,Wildtype_Tumor
C3N-01802,Wildtype_Tumor


In [15]:
#we need to join proteomics on them
proteomics = endo.get_proteomics()


proteomics = proteomics[["PIK3CA"]]#get just PIK3CA
#get rid of normal samples
proteomics = proteomics[~proteomics.index.str.contains(".N")]
proteomics.head()
# proteomics.loc[proteomics.index=="C3N-01825"]

Name,PIK3CA
Patient_ID,Unnamed: 1_level_1
C3L-00006,-0.45
C3L-00008,-0.215
C3L-00032,-0.178
C3L-00090,-0.392
C3L-00098,0.287


In [16]:
#are the indices equal?
proteomics.index.equals(mut_status.index)

False

In [40]:
proteomics["Mutation"] = mut_status['Mutation']
proteomics = proteomics.dropna()
proteomics.head()

Name,PIK3CA,Mutation
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
C3L-00006,-0.45,Missense_Mutation
C3L-00008,-0.215,Wildtype_Tumor
C3L-00032,-0.178,Missense_Mutation
C3L-00090,-0.392,Wildtype_Tumor
C3L-00136,-0.349,Wildtype_Tumor


In [46]:
mut_status = proteomics
mut_status[gene+"_proteomics"] = proteomics["PIK3CA"]
mut_status['binary_mutations'] = proteomics["Mutation"]
mut_status = mut_status.drop(columns=["PIK3CA", "Mutation"])
mut_status.head()

Name,PIK3CA_proteomics,binary_mutations
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
C3L-00006,-0.45,Missense_Mutation
C3L-00008,-0.215,Wildtype_Tumor
C3L-00032,-0.178,Missense_Mutation
C3L-00090,-0.392,Wildtype_Tumor
C3L-00136,-0.349,Wildtype_Tumor


In [62]:
u.wrap_ttest(mut_status, 'binary_mutations', ['PIK3CA_proteomics'])

In [None]:
gene = "PIK3CA"

In [None]:
def format_pval_annotation(pval_symbol, x1, x2, line_start = .05, line_height=.05):
    # for manual adjustment to pval annotations
    
    y, h = line_start, line_height
    plt.plot([x1, x1, x2, x2], #draw horizontal line
             [y, y+h, y+h, y], #vertical line
             lw=1.5, color= '.3')
    plt.text((x1+x2)*.5, # half between x coord
             y+h, pval_symbol, horizontalalignment='center', verticalalignment='bottom', color = "black")



In [None]:
#cis
plt.rcParams['figure.figsize']=(11.7,8.5) #size of plot
sns.set(font_scale = 1.2)

boxplot = sns.boxplot(x='Mutation', y=gene, 
                         order = ['Wildtype_Tumor', 'Missense_Mutation'], data = newdf, showfliers = False)    
boxplot.set_title('PIK3CA Truncation Effect on '+gene+' Proteomic Abundance in Endometrial Tumors')
boxplot = sns.stripplot(x='Mutation', y=gene, data = newdf, jitter = True, 
                           color = ".3", order = ['Wildtype_Tumor', 'Missense_Mutation'], dodge = True)
boxplot.set(xlabel = "\nPIK3CA Wildtype/Missense", ylabel = gene)

format_pval_annotation('* P-Value = 0.0004', 0, 1, 1)

plt.show()
plt.clf()
plt.close()

# This is from the meeting

get the missense mutation hotspots
abunance who have a mutation vs the wildtype (these are our two buckets)
for every gene do a t test
start with PIK3ca and go through every gene
Genes:
E542K
E535K
H1047R

In [None]:
import cptac

In [None]:
en = cptac.Endometrial()

In [None]:
col = cptac.Colon()

In [None]:
endo = en.get_genotype_all_vars("PIK3CA")

In [None]:
colon = col.get_genotype_all_vars("PIK3CA")

In [None]:
missense_colcon = colon.loc[colon['Mutation'] == "nonsynonymous SNV"]
missense_colcon

In [None]:
#pull out all the missense mutation
missense = endo.loc[endo['Mutation'] == "Missense_Mutation"]
# E542K
# E535K
# H1047R
missense