# Make Supplemental Figure: Endometrial Mutations

Create a csv file for MutPlot, a web based tool, found here: https://bioinformaticstools.shinyapps.io/lollipop/
More information about MutPlot: https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0215838

In [1]:
import pandas as pd

import cptac
import cptac.utils as u
import plot_utils as p

In [2]:
print('cptac version:', cptac.version())

cptac version: 0.8.5


In [3]:
e = cptac.Endometrial()

                                                

In [4]:
# Create df with truncations
prot_and_mutations = e.join_omics_to_mutations(
            mutations_genes = 'PTEN', omics_df_name = 'proteomics', omics_genes = 'PTEN', 
    mutations_filter = ['Nonsense_Mutation','Frame_Shift_Del','Frame_Shift_Ins'], tissue_type = 'tumor') 
prot_and_mutations = prot_and_mutations.loc[prot_and_mutations['PTEN_Mutation'] !='Wildtype_Tumor']
prot_and_mutations.PTEN_Mutation.value_counts()



Nonsense_Mutation    25
Frame_Shift_Del      22
Missense_Mutation    21
Frame_Shift_Ins       6
Splice_Site           1
Name: PTEN_Mutation, dtype: int64

Format df for MutPlot (lollipop plot creation)

In [5]:
# drop unnecesary columns
drop_cols = ['PTEN_proteomics','PTEN_Mutation_Status','Sample_Status']
df = prot_and_mutations.drop(columns=drop_cols)
# Create Hugo_Symbol column
df['Hugo_Symbol'] = 'PTEN'
# Rename columns
df = df.reset_index()
df1 = df.rename(columns={'Patient_ID': 'Sample_ID', 'PTEN_Mutation': 'Mutation_Type',
                         'PTEN_Location':'Protein_Change'})
# Keep only Truncation type mutations
df2 = df1.loc[df1['Mutation_Type'] != 'Missense_Mutation']
df3 = df2.loc[df1['Mutation_Type'] != 'Splice_Site']
df3.Mutation_Type.value_counts()

Nonsense_Mutation    25
Frame_Shift_Del      22
Frame_Shift_Ins       6
Name: Mutation_Type, dtype: int64

In [6]:
df3.head()

Name,Sample_ID,Mutation_Type,Protein_Change,Hugo_Symbol
0,C3L-00006,Nonsense_Mutation,p.R233*,PTEN
2,C3L-00032,Nonsense_Mutation,p.W111*,PTEN
5,C3L-00137,Nonsense_Mutation,p.Y180*,PTEN
7,C3L-00145,Frame_Shift_Ins,p.E242*,PTEN
8,C3L-00156,Nonsense_Mutation,p.E7*,PTEN


In [7]:
df3.to_csv('Table_for_Sup_Fig_3.csv') # use this file with MutPlot

Extra
* Alternative df with all truncations counted when multiple truncation type mutations in the same sample.

In [8]:
# Create lollipop plot with somatic mutations (counts all mutations - including multiple mutations in same sample)
mut = e.get_somatic_mutation()
pten = mut.loc[mut['Gene'] == 'PTEN']
df = pten.reset_index()
df1 = df.rename(columns={'Patient_ID': 'Sample_ID','Gene':'Hugo_Symbol',
                         'Mutation': 'Mutation_Type','Location':'Protein_Change'})
df2 = df1.loc[df1['Mutation_Type'] != 'Missense_Mutation']
df3 = df2.loc[df1['Mutation_Type'] != 'Splice_Site']
df3.Mutation_Type.value_counts()

Nonsense_Mutation    27
Frame_Shift_Del      26
Frame_Shift_Ins      11
Name: Mutation_Type, dtype: int64

In [9]:
#df3.to_csv('non_prioritized table.csv', '\t')