# Find Multiple Mutations in Cis

For a user specified gene, this notebook finds if there are multiple types of mutations 

In [1]:
import cptac
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

  import pandas.util.testing as tm


In [2]:
#import CPTAC data
en = cptac.Endometrial()
hn = cptac.Hnscc()
l = cptac.Luad()
o = cptac.Ovarian()
c = cptac.Ccrcc()
col = cptac.Colon()
g = cptac.Gbm()
b = cptac.Brca()
lscc = cptac.Lscc()

                                                

To find multiple mutations in one gene we will merge the dataframe from get_somatic_muations (does not include copy number mutations, but all other somatic mutations) with the dataframe from genotype_all_vars (with mutation parameter function, it will only include CNV)

In [3]:
cis_gene = "PTEN"
mutations = g.get_somatic_mutation()
#get just somatic mutations for cis_gene
cis_mut = mutations[mutations.Gene == cis_gene]
cis_mut = cis_mut[["Mutation","Location"]] # subset down
    
mut_copy_num = g.get_genotype_all_vars(cis_gene, mutations_filter = ["Deletion","Amplification"]) 
mut_copy_num = mut_copy_num.rename(columns={'Mutation':'CNV_mutation'})
mut_copy_num = mut_copy_num["CNV_mutation"]
all_mutations = pd.merge(mut_copy_num, cis_mut, on="Patient_ID", how = "outer")
all_mutations.head()

Unnamed: 0_level_0,CNV_mutation,Mutation,Location
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
C3L-00104,Deletion,Missense_Mutation,p.G251D
C3L-00365,Deletion,,
C3L-00674,Deletion,Missense_Mutation,p.C136Y
C3L-00677,Deletion,Missense_Mutation,p.R173H
C3L-01040,Deletion,,


In [4]:
# If mutation column is NaN replace with value in CNV_mutation
all_mutations['Mutation'] = all_mutations.apply(
        lambda row: row['CNV_mutation'] if pd.isnull(row['Mutation']) else row['Mutation'], axis=1)

#Find patients with multiple mutations 
all_mutations.query('CNV_mutation != Mutation')

Unnamed: 0_level_0,CNV_mutation,Mutation,Location
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
C3L-00104,Deletion,Missense_Mutation,p.G251D
C3L-00674,Deletion,Missense_Mutation,p.C136Y
C3L-00677,Deletion,Missense_Mutation,p.R173H
C3L-01043,Deletion,Missense_Mutation,p.I33V
C3L-01046,Deletion,Splice_Site,p.X70_splice
C3L-01049,Deletion,Frame_Shift_Del,p.D77Afs*22
C3L-01061,Deletion,In_Frame_Del,p.R15del
C3L-01146,Deletion,Nonsense_Mutation,p.R130*
C3L-01155,Deletion,Frame_Shift_Del,p.Y16Ifs*8
C3L-01156,Deletion,Missense_Mutation,p.K125E


Multiple somatic mutations are indicated using multiple rows with the same patient ids. We will group the rows by patient ids and then relabel the columns that have multiple mutations 

In [5]:

mutations = all_mutations.groupby(level=['Patient_ID'], sort=False).agg( ','.join)
mutations = mutations.replace(regex=[',' + cis_gene], value='')
mutations.Mutation = mutations.Mutation.apply(lambda x: 'Multiple_Missense' if 'Missense_Mutation,Missense_Mutation' in x else x)
mutations.CNV_mutation = mutations.CNV_mutation.apply(lambda x: 'Multiple_Missense' if 'Missense_Mutation,Missense_Mutation' in x else x)
mutations.Mutation = mutations.Mutation.apply(lambda x: 'Multiple_Mutations' if ',' in x else x)
mutations.Mutation.value_counts()

Deletion             56
Wildtype_Tumor       16
Missense_Mutation    14
Frame_Shift_Del       3
Nonsense_Mutation     3
Splice_Site           3
Frame_Shift_Ins       2
In_Frame_Del          2
Name: Mutation, dtype: int64

In [6]:
#Make column All_Mutation which is a list of all mutations 
mutations['All_Mutations'] = mutations.apply(
        lambda row: [row["CNV_mutation"] , row["Mutation"]] if (row["CNV_mutation"] != row["Mutation"]) else [row['Mutation']], axis=1)
mutations.All_Mutations.value_counts()

TypeError: unhashable type: 'list'

Exception ignored in: 'pandas._libs.index.IndexEngine._call_map_locations'
Traceback (most recent call last):
  File "pandas/_libs/hashtable_class_helper.pxi", line 1652, in pandas._libs.hashtable.PyObjectHashTable.map_locations
TypeError: unhashable type: 'list'


[Deletion]                       56
[Wildtype_Tumor]                 16
[Deletion, Missense_Mutation]    13
[Deletion, Nonsense_Mutation]     3
[Deletion, Frame_Shift_Del]       3
[Deletion, Splice_Site]           2
[Deletion, In_Frame_Del]          2
[Deletion, Frame_Shift_Ins]       2
[Missense_Mutation]               1
[Splice_Site]                     1
Name: All_Mutations, dtype: int64

In [7]:
def find_multi_cis_mut(cancer_obj,cis_gene):
    #Get all somatic mutations 
    mutations = cancer_obj.get_somatic_mutation()
    #get just somatic mutations for cis_gene
    cis_mut = mutations[mutations.Gene == cis_gene]
    cis_mut = cis_mut[["Mutation"]]
    # Multiple somatic mutations are indicated using multiple rows with the same patient ids.
    # We will group the rows by patient ids and then relabel the columns that have multiple mutations 
    
    cis_mut = cis_mut.groupby(level=['Patient_ID'], sort=False).agg( ','.join)
    cis_mut = cis_mut.replace(regex=[',' + cis_gene], value='')
    cis_mut.Mutation = cis_mut.Mutation.apply(lambda x: 'Multiple_Missense' if 'Missense_Mutation,Missense_Mutation' in x else x)
    cis_mut.Mutation = cis_mut.Mutation.apply(lambda x: 'Multiple_Frame_Shift_Del' if 'Frame_Shift_Del,Frame_Shift_Del' in x else x)
    cis_mut.Mutation = cis_mut.Mutation.apply(lambda x: 'Multiple_Splice_Site' if 'Splice_Site,Splice_Site' in x else x)
   
   #get CNV mutations 
    mut_copy_num = cancer_obj.get_genotype_all_vars(cis_gene, mutations_filter = ["Deletion","Amplification"])
    mut_copy_num = mut_copy_num.rename(columns={'Mutation': 'CNV_Mutation'})
    mut_copy_num = mut_copy_num["CNV_Mutation"]
    mutations = pd.merge(mut_copy_num, cis_mut, on="Patient_ID", how = "outer")
   
     
    # If mutation column is NaN replace with value in Mutation_Copy_Num
    mutations['Mutation'] = mutations.apply(
        lambda row: row['CNV_Mutation'] if pd.isnull(row['Mutation']) else row['Mutation'], axis=1)
   
    #Make column All_Mutation which is a list of all mutations 
    
    mutations['All_Mutations'] = mutations.apply(
        lambda row: [row["CNV_Mutation"] , row["Mutation"]] if (row["CNV_Mutation"] != row["Mutation"]) else [row['Mutation']], axis=1)
    mutations = mutations.rename(columns={'All_Mutations': 'All_Mutations_' + cis_gene})
    return mutations


In [8]:
#Call function to create dataframe with all mutations 
cis_gene = "PIK3CA"
pik3ca_mutations = find_multi_cis_mut(b,"PIK3CA")
pik3ca_mutations

Unnamed: 0_level_0,CNV_Mutation,Mutation,All_Mutations_PIK3CA
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CPT000814,Amplification,Amplification,[Amplification]
CPT001846,Amplification,Missense_Mutation,"[Amplification, Missense_Mutation]"
X01BR001,Wildtype_Tumor,Wildtype_Tumor,[Wildtype_Tumor]
X01BR008,Wildtype_Tumor,Wildtype_Tumor,[Wildtype_Tumor]
X01BR009,Amplification,Amplification,[Amplification]
...,...,...,...
X21BR001,Amplification,Amplification,[Amplification]
X21BR002,Amplification,Missense_Mutation,"[Amplification, Missense_Mutation]"
X21BR010,Amplification,Missense_Mutation,"[Amplification, Missense_Mutation]"
X22BR005,Missense_Mutation,Missense_Mutation,[Missense_Mutation]


In [9]:
pik3ca_mutations = pik3ca_mutations["All_Mutations_"+cis_gene]
pik3ca_mutations.value_counts()

TypeError: unhashable type: 'list'

Exception ignored in: 'pandas._libs.index.IndexEngine._call_map_locations'
Traceback (most recent call last):
  File "pandas/_libs/hashtable_class_helper.pxi", line 1652, in pandas._libs.hashtable.PyObjectHashTable.map_locations
TypeError: unhashable type: 'list'


[Wildtype_Tumor]                          53
[Amplification]                           23
[Missense_Mutation]                       18
[Amplification, Missense_Mutation]        12
[Deletion]                                 7
[Missense_Mutation, Multiple_Missense]     3
[In_Frame_Del]                             3
[Deletion, Missense_Mutation]              1
[Amplification, In_Frame_Del]              1
[Silent]                                   1
Name: All_Mutations_PIK3CA, dtype: int64

In [10]:
#Call function to create dataframe with all mutations 
cis_gene = "PTEN"
PTEN_mutations = find_multi_cis_mut(b,cis_gene)
pten_mutations = PTEN_mutations["All_Mutations_"+cis_gene]
pten_mutations

Patient_ID
CPT000814    [Wildtype_Tumor]
CPT001846    [Wildtype_Tumor]
X01BR001     [Wildtype_Tumor]
X01BR008      [Amplification]
X01BR009     [Wildtype_Tumor]
                   ...       
X21BR001      [Amplification]
X21BR002     [Wildtype_Tumor]
X21BR010     [Wildtype_Tumor]
X22BR005     [Wildtype_Tumor]
X22BR006     [Wildtype_Tumor]
Name: All_Mutations_PTEN, Length: 122, dtype: object

In [11]:
#Call function to create dataframe with all mutations 
cis_gene = "PIK3CA"
pik3ca_mutations = find_multi_cis_mut(b,cis_gene)
pik3ca_mutations = pik3ca_mutations["All_Mutations_"+cis_gene]
pik3ca_mutations

Patient_ID
CPT000814                       [Amplification]
CPT001846    [Amplification, Missense_Mutation]
X01BR001                       [Wildtype_Tumor]
X01BR008                       [Wildtype_Tumor]
X01BR009                        [Amplification]
                            ...                
X21BR001                        [Amplification]
X21BR002     [Amplification, Missense_Mutation]
X21BR010     [Amplification, Missense_Mutation]
X22BR005                    [Missense_Mutation]
X22BR006     [Amplification, Missense_Mutation]
Name: All_Mutations_PIK3CA, Length: 122, dtype: object

# Find multiple mutations for different genes

In [12]:
pten_pik3ca = pd.merge(pik3ca_mutations, pten_mutations, on="Patient_ID", how = "outer")
pten_pik3ca['All_Mutations_PTEN'] = [','.join(map(str, l)) for l in pten_pik3ca['All_Mutations_PTEN']]

pten_pik3ca['All_Mutations_PIK3CA'] = [','.join(map(str, l)) for l in pten_pik3ca['All_Mutations_PIK3CA']]
pten_pik3ca

Unnamed: 0_level_0,All_Mutations_PIK3CA,All_Mutations_PTEN
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
CPT000814,Amplification,Wildtype_Tumor
CPT001846,"Amplification,Missense_Mutation",Wildtype_Tumor
X01BR001,Wildtype_Tumor,Wildtype_Tumor
X01BR008,Wildtype_Tumor,Amplification
X01BR009,Amplification,Wildtype_Tumor
...,...,...
X21BR001,Amplification,Amplification
X21BR002,"Amplification,Missense_Mutation",Wildtype_Tumor
X21BR010,"Amplification,Missense_Mutation",Wildtype_Tumor
X22BR005,Missense_Mutation,Wildtype_Tumor


In [13]:
#only pten deletion
pten_deletion = pten_pik3ca.loc[pten_pik3ca['All_Mutations_PTEN'].isin(["Deletion"])]
#All pten deletions 
pten_deletion_all = pten_pik3ca.loc[pten_pik3ca['All_Mutations_PTEN'].str.contains("Deletion")]
pten_deletion_all

Unnamed: 0_level_0,All_Mutations_PIK3CA,All_Mutations_PTEN
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
X01BR010,In_Frame_Del,Deletion
X01BR025,Amplification,Deletion
X01BR027,Wildtype_Tumor,Deletion
X01BR043,Silent,Deletion
X03BR002,Wildtype_Tumor,Deletion
X03BR004,Wildtype_Tumor,"Deletion,Frame_Shift_Del"
X05BR016,Wildtype_Tumor,Deletion
X05BR042,Wildtype_Tumor,Deletion
X05BR043,"Amplification,Missense_Mutation",Deletion
X06BR005,Wildtype_Tumor,Deletion


In [14]:
pten_pik_wt = pten_deletion.loc[pten_deletion['All_Mutations_PIK3CA'].isin(["Wildtype_Tumor"])]
pten_pik_wt

Unnamed: 0_level_0,All_Mutations_PIK3CA,All_Mutations_PTEN
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
X01BR027,Wildtype_Tumor,Deletion
X03BR002,Wildtype_Tumor,Deletion
X05BR016,Wildtype_Tumor,Deletion
X05BR042,Wildtype_Tumor,Deletion
X06BR005,Wildtype_Tumor,Deletion
X11BR016,Wildtype_Tumor,Deletion
X11BR020,Wildtype_Tumor,Deletion
X11BR055,Wildtype_Tumor,Deletion
X11BR058,Wildtype_Tumor,Deletion
X11BR075,Wildtype_Tumor,Deletion


# Find wildtype PIK3CA, PTEN deletion 

In [15]:
#Select the cancers that you would like to test and add cptac data into a dictionary
cptac_cancers = {'HNSCC':hn, 'LUAD':l, 'LSCC':lscc, 'BR':b, 'CO':col, 'OV':o, "GBM":g}
cis_gene1 = "PTEN"
gene1_mut ="Deletion"
cis_gene2 = "PIK3CA"

merged_dfs = {}
for cancer in cptac_cancers:

    gene1_mutations = find_multi_cis_mut(cptac_cancers[cancer],cis_gene1)
    gene1_mutations = gene1_mutations["All_Mutations_"+cis_gene1]
    
    gene2_mutations = find_multi_cis_mut(cptac_cancers[cancer],cis_gene)
    gene2_mutations = gene2_mutations["All_Mutations_"+cis_gene]
    
    gene1_gene2 = pd.merge(gene1_mutations, gene2_mutations, on="Patient_ID", how = "outer")
    gene1_gene2['All_Mutations_' + cis_gene1] = [','.join(map(str, l)) for l in gene1_gene2['All_Mutations_' + cis_gene1]]

    gene1_gene2['All_Mutations_'+ cis_gene2] = [','.join(map(str, l)) for l in gene1_gene2['All_Mutations_'+cis_gene2]]
    gene1_gene2["Cancer"] = cancer
    
    #only gene1 with specified mutation
    gene1_mutated = gene1_gene2.loc[gene1_gene2['All_Mutations_'+cis_gene1].isin([gene1_mut])]
    
    #gene1 mutated with only gene2 wildtype
    gene1_mut_gene2_wt = gene1_mutated.loc[gene1_mutated['All_Mutations_' + cis_gene2].isin(["Wildtype_Tumor"])]

    merged_dfs[cancer] = gene1_mut_gene2_wt
    # Create csv
    #gene1_mut_gene2_wt.to_csv('csv/'+cancer+'_pten_mut_pik3ca_wt.csv', index=False)
    print(gene1_mut_gene2_wt.head(), '\n')
    

           All_Mutations_PTEN All_Mutations_PIK3CA Cancer
Patient_ID                                               
C3N-01754            Deletion       Wildtype_Tumor  HNSCC 

           All_Mutations_PTEN All_Mutations_PIK3CA Cancer
Patient_ID                                               
C3L-00001            Deletion       Wildtype_Tumor   LUAD
C3L-00422            Deletion       Wildtype_Tumor   LUAD
C3L-00510            Deletion       Wildtype_Tumor   LUAD
C3L-02365            Deletion       Wildtype_Tumor   LUAD
C3N-00169            Deletion       Wildtype_Tumor   LUAD 

           All_Mutations_PTEN All_Mutations_PIK3CA Cancer
Patient_ID                                               
C3L-00904            Deletion       Wildtype_Tumor   LSCC
C3L-02665            Deletion       Wildtype_Tumor   LSCC
C3N-01411            Deletion       Wildtype_Tumor   LSCC 

           All_Mutations_PTEN All_Mutations_PIK3CA Cancer
Patient_ID                                               
X01BR027

In [18]:
#Call function to create dataframe with all mutations 
cis_gene = "PTEN"
PTEN_mutations = find_multi_cis_mut(hn,cis_gene)
pten_mutations = PTEN_mutations["All_Mutations_"+cis_gene]
#Call function to create dataframe with all mutations 
cis_gene = "PIK3CA"
pik3ca_mutations = find_multi_cis_mut(hn,cis_gene)
pik3ca_mutations = pik3ca_mutations["All_Mutations_"+cis_gene]

pten_pik3ca = pd.merge(pik3ca_mutations, pten_mutations, on="Patient_ID", how = "outer")
pten_pik3ca['All_Mutations_PTEN'] = [','.join(map(str, l)) for l in pten_pik3ca['All_Mutations_PTEN']]

pten_pik3ca['All_Mutations_PIK3CA'] = [','.join(map(str, l)) for l in pten_pik3ca['All_Mutations_PIK3CA']]
pten_pik3ca

Unnamed: 0_level_0,All_Mutations_PIK3CA,All_Mutations_PTEN
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
C3L-00977,Wildtype_Tumor,Wildtype_Tumor
C3L-00987,Amplification,Deletion
C3L-00994,Wildtype_Tumor,Wildtype_Tumor
C3L-00995,Wildtype_Tumor,Wildtype_Tumor
C3L-00997,Amplification,Wildtype_Tumor
...,...,...
C3N-04277,Wildtype_Tumor,Wildtype_Tumor
C3N-04278,Amplification,Wildtype_Tumor
C3N-04279,Amplification,Wildtype_Tumor
C3N-04280,Missense_Mutation,Wildtype_Tumor


In [23]:
#only pten deletion
pten_deletion = pten_pik3ca.loc[pten_pik3ca['All_Mutations_PTEN'].isin(["Deletion"])]
pten_deletion["Cancer"] = "HNSCC"
pten_deletion

Unnamed: 0_level_0,All_Mutations_PIK3CA,All_Mutations_PTEN,Cancer
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
C3L-00987,Amplification,Deletion,HNSCC
C3L-01237,Amplification,Deletion,HNSCC
C3L-03378,Amplification,Deletion,HNSCC
C3N-00204,Nonstop_Mutation,Deletion,HNSCC
C3N-00829,Amplification,Deletion,HNSCC
C3N-01339,Amplification,Deletion,HNSCC
C3N-01620,Amplification,Deletion,HNSCC
C3N-01754,Wildtype_Tumor,Deletion,HNSCC
C3N-01756,Amplification,Deletion,HNSCC
C3N-01859,Amplification,Deletion,HNSCC


In [21]:
pten_pik_wt = pten_deletion.loc[pten_deletion['All_Mutations_PIK3CA'].isin(["Wildtype_Tumor"])]
pten_pik_wt

Unnamed: 0_level_0,All_Mutations_PIK3CA,All_Mutations_PTEN
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1
C3N-01754,Wildtype_Tumor,Deletion
