In [1]:
import cptac
import cptac.utils as u
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

  import pandas.util.testing as tm


## Load cancer data

In [2]:
en = cptac.Endometrial()
col = cptac.Colon()
br = cptac.Brca()

                                                

## Look at the cis effect of the PIK3CA genes

In [3]:
gene = 'PIK3CA'

# Endometrial

## Pull out all the missense mutation and wildtype tumors 



In [4]:
endo = en.get_genotype_all_vars("PIK3CA")
endo_missense = endo.loc[endo['Mutation'] == "Missense_Mutation"]



## Pull out only hotspot mutations

In [5]:
hotspot = endo_missense[(endo_missense.Location.str.contains('E542K'))|(endo_missense.Location.str.contains('E545K'))|(endo_missense.Location.str.contains('H1047R'))]
# hotspot

## Get Wildtype Mutations

In [6]:
wildtype = endo.loc[endo['Mutation'] == "Wildtype_Tumor"]
result = pd.concat([hotspot, wildtype])

## Get proteomics for PIK3CA

In [7]:
prot = en.get_proteomics()
prot = prot[gene]

## Join proteomics to mutation

In [8]:
joined = result.join(prot)

## Drop unnecessary columns

In [9]:
endo_missense = joined.drop(columns=['Location', 'Mutation_Status'])
# endo_missense

# Brca

In [10]:
brca = br.get_genotype_all_vars("PIK3CA")
brca_missense = brca.loc[brca['Mutation'] == "Missense_Mutation"]

hotspot = brca_missense[(brca_missense.Location.str.contains('E542K'))|(brca_missense.Location.str.contains('E545K'))|(brca_missense.Location.str.contains('H1047R'))]

wildtype = brca.loc[brca['Mutation'] == "Wildtype_Tumor"]
result = pd.concat([hotspot, wildtype])

prot = br.get_proteomics()
prot = br.reduce_multiindex(prot, "Database_ID")
prot = prot[gene]
prot

joined = result.join(prot)

brca_missense = joined.drop(columns=['Location', 'Mutation_Status'])
brca_missense



AttributeError: 'Brca' object has no attribute 'reduce_multiindex'

# Colon

In [None]:
colon = col.get_genotype_all_vars("PIK3CA")
colon_missense = colon.loc[colon['Mutation'] == "nonsynonymous SNV"]
hotspot = colon_missense[(colon_missense.Location.str.contains('E542K'))|(colon_missense.Location.str.contains('E545K'))|(colon_missense.Location.str.contains('H1047R'))]

wildtype = colon.loc[colon['Mutation'] == "Wildtype_Tumor"]
result = pd.concat([hotspot, wildtype])

prot = col.get_proteomics()
prot = prot[gene]

joined = result.join(prot)

colon_missense = joined.drop(columns=['Location', 'Mutation_Status'])
colon_missense

# Final Graph

## Reformat dataframes

In [None]:
endo_missense = endo_missense.assign(cancer = 'Endometrial')
brca_missense = brca_missense.assign(cancer = 'Brca')
colon_missense = colon_missense.assign(cancer = 'Colon')
colon_missense["Mutation"] = colon_missense["Mutation"].replace("nonsynonymous SNV", "Missense_Mutation")

In [None]:
endo_missense.head()

In [None]:
brca_missense.head()

In [None]:
colon_missense.head(10)

## Join all the cancer data tables together.
This is the one we will use to make the graph

In [None]:
df = endo_missense.append(brca_missense)
df = df.append(colon_missense)
df

In [None]:
# pancancer cis plot
gene = 'PIK3CA'
plt.rcParams['figure.figsize']=(11.7,8.5) #size of plot
sns.set(font_scale = 1.2)

boxplot = sns.boxplot(x='cancer', y=gene, data = df, hue = 'Mutation',
                      hue_order = ["Wildtype_Tumor", "Missense_Mutation"], showfliers = False)    
boxplot.set_title('Pancancer cis Effect of PIK3CA Hotspot Mutations')
boxplot = sns.stripplot(x='cancer', y=gene, data = df, jitter = True, 
                           color = ".3", hue = 'Mutation', hue_order = ["Wildtype_Tumor", "Missense_Mutation"], dodge = True)
boxplot.set(xlabel = "\nPTEN Wildtype/Hotspot_Mutation", ylabel = gene+"_proteomics")

# format legend
handles, labels = boxplot.get_legend_handles_labels()
plt.legend(handles, ['Wildtype', 'Hotspot_Mutation'])



plt.show()
plt.clf()
plt.close()

In [None]:
fig = boxplot.get_figure()
fig.savefig("cis_effects_proteomics.png")