# Count samples with Deletion and another mutation

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import cptac
import cptac.utils as u
import plot_utils as p

In [2]:
print('cptac version:', cptac.version())

cptac version: 0.8.6


In [3]:
import warnings
warnings.filterwarnings('ignore')

# Step 1: Create df with Mutation Types

For each cancer type, create a df that has the mutation type for each sample.

First, load in the cancer data sets from cptac.

In [4]:
en = cptac.Endometrial()
hn = cptac.Hnscc()
l = cptac.Luad()
ls = cptac.Lscc()
o = cptac.Ovarian()
#c = cptac.Ccrcc()
col = cptac.Colon()
g = cptac.Gbm()
b = cptac.Brca()

                                                

In [5]:
def get_samples_mult_mutations(gen_vars_df, somatic_mut_df, cnv_mut, cancer):
    cnv_list = list(gen_vars_df.loc[gen_vars_df['Mutation'] == cnv_mut].index)
    mut_list = list(somatic_mut_df.loc[somatic_mut_df['PTEN_Mutation'] != 'Wildtype_Tumor'].index)

    # check samples with deletion and mutation
    common_elements = np.intersect1d(mut_list, cnv_list)
    dm = len(common_elements)
    print(cancer+' samples with a '+ cnv_mut+ ' and a mutation:', dm)
    total_cnv_samples = len(cnv_list)
    print(dm, '/', total_cnv_samples, '=', dm/total_cnv_samples, '\n')
    return common_elements

In [6]:
cancer_obj = {'GBM':g, 'HNSCC':hn, 'LSCC':ls, 'BR':b, 'CO':col, 'OV':o}

In [7]:
ca_filter = ['Nonsense_Mutation', 'Frame_Shift_Ins', 'Frame_Shift_Del', 'Splice_Site', 'Missense_Mutation']
colon_filter = ['stopgain', 'stoploss', 'frameshift deletion', 'frameshift insertion', 'frameshift substitution', 
'nonframeshift deletion', 'nonframeshift insertion', 'nonframeshift substitution', 'nonsynonymous SNV']

In [8]:
mult_mut_samples = {}
gene = 'PTEN'
for cancer in cancer_obj:
    ca = cancer_obj[cancer]
    
    cnv_df = ca.get_genotype_all_vars('PTEN')
    
    if ca.get_cancer_type() in ('colon'):
        mut = ca.join_omics_to_mutations(
                    mutations_genes = [gene], omics_df_name = 'proteomics', omics_genes = gene, 
                    tissue_type = 'tumor', mutations_filter = colon_filter)
    else:
        mut = ca.join_omics_to_mutations(
                    mutations_genes = [gene], omics_df_name = 'proteomics', omics_genes = gene, 
                    tissue_type = 'tumor', mutations_filter = ca_filter)
    # Reduce a multiindex 
    if isinstance(mut.keys(), pd.core.indexes.multi.MultiIndex):
        mut = u.reduce_multiindex(mut, levels_to_drop = 1)
    
    mult_samples = get_samples_mult_mutations(cnv_df, mut, 'Deletion', cancer)

GBM samples with a Deletion and a mutation: 25
25 / 81 = 0.30864197530864196 

HNSCC samples with a Deletion and a mutation: 1
1 / 18 = 0.05555555555555555 

LSCC samples with a Deletion and a mutation: 4
4 / 39 = 0.10256410256410256 

BR samples with a Deletion and a mutation: 3
3 / 24 = 0.125 

CO samples with a Deletion and a mutation: 2
2 / 25 = 0.08 

OV samples with a Deletion and a mutation: 2
2 / 26 = 0.07692307692307693 



In [9]:
# Endo

In [10]:
ca = en
mut = ca.join_omics_to_mutations(
                mutations_genes = [gene], omics_df_name = 'proteomics', omics_genes = gene, 
                tissue_type = 'tumor', mutations_filter = ca_filter)
# Reduce a multiindex 
if isinstance(mut.keys(), pd.core.indexes.multi.MultiIndex):
    mut = u.reduce_multiindex(mut, levels_to_drop = 1)

In [11]:
compare = ['Nonsense_Mutation','Frame_Shift_Ins','Frame_Shift_Del']
get = mut[gene+'_Mutation'].isin(compare)
trunc_df = mut[get]

total_trunc = len(trunc_df)
total_trunc
vc = trunc_df.PTEN_Mutation_Status.value_counts()
mult_mut = int(vc[['Multiple_mutation']])

In [12]:
print('EC samples with a Truncation type mutation and another mutation:', mult_mut)
print(mult_mut, '/', total_trunc, '=', mult_mut/total_trunc, '\n')

EC samples with a Truncation type mutation and another mutation: 32
32 / 53 = 0.6037735849056604 

