In [1]:
import cptac
import cptac.utils as u
import seaborn as sns
import matplotlib.pyplot as plt

  import pandas.util.testing as tm


In [None]:
en = cptac.Endometrial()
col = cptac.Colon()
br = cptac.Brca()

Loading dataframes.......           

In [None]:
gene = 'PIK3CA'

In [None]:
#Prioritize hotspot mutations
endo = en.get_genotype_all_vars("PIK3CA", mutation_hotspot=["H1047R", 'E542K', 'E535K'])

In [None]:
#pull out all the missense mutation and wildtype tumors 
missense = endo.loc[(endo['Mutation'] == "Missense_Mutation") | (endo['Mutation'] == "Wildtype_Tumor")]

# Now look at the cis effect for endometrial cancer 
(that is the difference in missense mutation between wildtype_tumor and missense_mutation)

In [None]:
#drop everything but the mutation column and join to proteomic data
missense = missense.drop(columns=['Location', 'Mutation_Status'])
prot = en.get_proteomics() #get proteomic data
missense[gene+'_proteomics'] = prot[gene]#filter for PIK3CA
missense

In [None]:
u.wrap_ttest(missense, "Mutation", [gene+"_proteomics"], return_all=True)

In [None]:
def format_pval_annotation(pval_symbol, x1, x2, line_start = .05, line_height=.05):
    # for manual adjustment to pval annotations
    
    y, h = line_start, line_height
    plt.plot([x1, x1, x2, x2], #draw horizontal line
             [y, y+h, y+h, y], #vertical line
             lw=1.5, color= '.3')
    plt.text((x1+x2)*.5, # half between x coord
             y+h, pval_symbol, horizontalalignment='center', verticalalignment='bottom', color = "black")



In [None]:
plt.rcParams['figure.figsize']=(11.7,8.5) #size of plot
sns.set(font_scale = 1.2)

boxplot = sns.boxplot(x='Mutation', y=gene+'_proteomics',
                     order=['Wildtype_Tumor', 'Missense_Mutation'],
                     data=missense,
                     showfliers=False)

boxplot.set_title("PIK3CA missense effections on" + gene + "Proteomic Abundance in Endometrial")
boxplot = sns.stripplot(x='Mutation', y=gene+'_proteomics', data=missense, jitter=True)
boxplot.set(xlabel="\nPIK3CA Wildtype_Tumor/Missense_Mutation", ylabel=gene+"_proteomics")

format_pval_annotation('* P-vlaue = .201', 0, 1, 1)

plt.show()
plt.clf()
plt.close()

# Cis effects for BRCA

In [None]:
#Prioritize hotspot mutations
brca = br.get_genotype_all_vars("PIK3CA", mutation_hotspot=["H1047R", 'E542K', 'E535K'])

In [None]:
#pull out all the missense mutation and wildtype tumors 
brca_missense = brca.loc[(brca['Mutation'] == "Missense_Mutation") | (brca['Mutation'] == "Wildtype_Tumor")]

In [None]:
#drop everything but the mutation column and join to proteomic data
brca_missense = brca_missense.drop(columns=['Location', 'Mutation_Status'])
prot = br.get_proteomics() #get proteomic data
brca_missense[gene+'_proteomics'] = prot[gene]#filter for PIK3CA
brca_missense

In [None]:
brca_missense.index.equals(prot[gene])

In [None]:
u.wrap_ttest(brca_missense, "Mutation", [gene+"_proteomics"], return_all=True)

In [None]:
plt.rcParams['figure.figsize']=(11.7,8.5) #size of plot
sns.set(font_scale = 1.2)

boxplot = sns.boxplot(x='Mutation', y=gene+'_proteomics',
                     order=['Wildtype_Tumor', 'Missense_Mutation'],
                     data=brca_missense,
                     showfliers=False)

boxplot.set_title("PIK3CA missense effections on" + gene + "Proteomic Abundance in Brca")
boxplot = sns.stripplot(x='Mutation', y=gene+'_proteomics', data=brca_missense, jitter=True)
boxplot.set(xlabel="\nPIK3CA Wildtype_Tumor/Missense_Mutation", ylabel=gene+"_proteomics")

format_pval_annotation('* P-vlaue = .689', 0, 1, 1)

plt.show()
plt.clf()
plt.close()

# Cis effect for Colon

In [None]:
#Prioritize hotspot mutations
colon = col.get_genotype_all_vars("PIK3CA", mutation_hotspot=["H1047R", 'E542K', 'E535K'])

In [None]:
colon['Mutation'].unique()

In [None]:
#pull out all the missense mutation and wildtype tumors 
colon_missense = colon.loc[(colon['Mutation'] == "nonsynonymous SNV") | (colon['Mutation'] == "Wildtype_Tumor")]

In [None]:
#drop everything but the mutation column and join to proteomic data
colon_missense = colon_missense.drop(columns=['Location', 'Mutation_Status'])
prot = col.get_proteomics() #get proteomic data
colon_missense[gene+'_proteomics'] = prot[gene]#filter for PIK3CA
colon_missense

In [None]:
u.wrap_ttest(colon_missense, "Mutation", [gene+"_proteomics"], return_all=True)

In [None]:
plt.rcParams['figure.figsize']=(11.7,8.5) #size of plot
sns.set(font_scale = 1.2)

boxplot = sns.boxplot(x='Mutation', y=gene+'_proteomics',
                     order=['Wildtype_Tumor', 'nonsynonymous SNV'],
                     data=colon_missense,
                     showfliers=False)

boxplot.set_title("PIK3CA missense effections on" + gene + "Proteomic Abundance in Colon")
boxplot = sns.stripplot(x='Mutation', y=gene+'_proteomics', data=colon_missense, jitter=True)
boxplot.set(xlabel="\nPIK3CA Wildtype_Tumor/nonsynonymous SNV", ylabel=gene+"_proteomics")

format_pval_annotation('* P-vlaue = .314', 0, 1, 1)

plt.show()
plt.clf()
plt.close()