In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats

import cptac
import cptac.algorithms as al

In [44]:
# omics and mut, tumors, gene

def get_missence_truncation_comparison(cancer_object, specific_omics, gene):
    #get omics data and tumors
    omics_and_mutations = cancer_object.append_mutations_to_omics(
            mutation_genes = gene, omics_df_name = specific_omics.name, omics_genes = gene)
    tumors = omics_and_mutations.loc[omics_and_mutations['Sample_Status'] == 'Tumor']
    
    #data for mutation comparison
    somatic_mutations = cancer_object.get_mutations().reset_index()
    gene_df = somatic_mutations.loc[somatic_mutations['Gene'] == gene]
    
    if cancer_object.get_cancer_type() == 'colon':
        missence_truncation_groups = {'frameshift substitution': 'T', 
            'frameshift deletion': 'T', 'frameshift insertion': 'T', 
            'stopgain': 'T', 'stoploss': 'T', 'nonsynonymous SNV': 'M',
            'nonframeshift insertion': 'M','nonframeshift deletion': 'M', 
            'nonframeshift substitution': 'M'}
    else: 
        missence_truncation_groups = {'In_Frame_Del': 'M', 'In_Frame_Ins': 'M',
            'Missense_Mutation': 'M', 'Frame_Shift_Del': 'T','Nonsense_Mutation': 'T', 
            'Splice_Site': 'T', 'Frame_Shift_Ins': 'T','Nonstop_Mutation':'T'}
        
    mutations_replaced_M_T = gene_df.replace(missence_truncation_groups)
    
    # group mutation categories
    miss = mutations_replaced_M_T.loc[mutations_replaced_M_T['Mutation'] == 'M']
    trunc = mutations_replaced_M_T.loc[mutations_replaced_M_T['Mutation'] == 'T']
    
    #get lists of unique samples for missence and trucation categories
    miss_unique_samples = list(miss['Sample_ID'].unique())
    trunc_unique_samples = list(trunc['Sample_ID'].unique())
    
    # Get mutation catagories with omics data
    missence_omics = tumors.loc[tumors.index.isin(miss_unique_samples)]
    truncation_omics = tumors.loc[tumors.index.isin(trunc_unique_samples)]
    
    
  
    return truncation_omics


In [8]:
en_object = cptac.Endometrial()
desired_cutoff = .1

endometrial_freq_mut = al.get_frequently_mutated(en_object, cutoff=desired_cutoff)
print('\n\nNumber of Frequently Mutated Genes:', len(endometrial_freq_mut), '\n', endometrial_freq_mut.head())

[Kmatting dataframes...linear data.....

Number of Frequently Mutated Genes: 232 
      Gene  Unique_Samples_Mut  Missence_Mut  Truncation_Mut
0  ABCA12            0.147368      0.094737        0.073684
1  ABCA13            0.115789      0.105263        0.042105
2  ACVR2A            0.105263      0.010526        0.094737
3  ADGRG4            0.136842      0.126316        0.021053
4  ADGRV1            0.115789      0.094737        0.052632


In [9]:
omics = en_object.get_proteomics()
#omics = en_object.get_transcriptomics()
#omics = en_object.get_phosphoproteomics()
#omics = en_object.get_acetylproteomics()

In [42]:
mutation_type_comparison = get_missence_truncation_comparison(en_object, omics, 'ARID1A')
mutation_type_comparison

Unnamed: 0_level_0,ARID1A_proteomics,ARID1A_Mutation,ARID1A_Location,ARID1A_Mutation_Status,Sample_Status
Sample_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
S002,-1.41,"[Nonsense_Mutation, Frame_Shift_Del]","[p.Q403*, p.D1850Tfs*33]",Multiple_mutation,Tumor
S008,0.0622,[Frame_Shift_Del],[p.T541Pfs*77],Single_mutation,Tumor
S012,-1.61,[Nonsense_Mutation],[p.R1989*],Single_mutation,Tumor
S014,-1.54,"[Nonsense_Mutation, Missense_Mutation]","[p.Q557*, p.I1117F]",Multiple_mutation,Tumor
S018,0.195,[Nonsense_Mutation],[p.R1989*],Single_mutation,Tumor
S019,-0.209,[Frame_Shift_Ins],[p.D1850Gfs*4],Single_mutation,Tumor
S023,0.149,"[Nonsense_Mutation, Nonsense_Mutation]","[p.C1827*, p.R1989*]",Multiple_mutation,Tumor
S024,-0.992,"[Nonsense_Mutation, Frame_Shift_Ins, Missense_...","[p.Y471*, p.H1581Qfs*22, p.G2087R]",Multiple_mutation,Tumor
S026,-0.113,[Nonsense_Mutation],[p.R750*],Single_mutation,Tumor
S030,-1.19,[Frame_Shift_Del],[p.A339Lfs*24],Single_mutation,Tumor
