# Make KRAS hotspot table

In [1]:
import cptac
import cptac.utils as u
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

# Load cptac datasets

In [2]:
en = cptac.Endometrial()
col = cptac.Colon()
l = cptac.Luad()

                                                

In [3]:
#use get_somatic mutation not get genotype due to priority order for gen_genotype_all_vars
'''
This function takes a cptac cancer object and returns a df with the PIK3CA hotspot mutations counts.
@Param cptac_cancer: cptac cancer object
@Param cancer_name: string of cancer type for column label
'''
def makeHotspotTable(cptac_cancer, cancer_name):

    mutation_df = cptac_cancer.get_somatic_mutation() # get mutation data
    gene_df = mutation_df.loc[mutation_df['Gene'] == "KRAS"] # only for KRAS
    gene_df = gene_df.replace(r'p.', "", regex = True)
    #gene_df['Location'] = [','.join(map(str, l)) for l in gene_df['Location']] # combines if multiple Ex: G12C,G13E
    counts = gene_df.groupby('Location').count()
    
    #get only hotspot mutations #'G12', 'G13', 'Q61
    hotspots = ['G12', 'G13', 'Q61', 'No_mutation']
    hotspot_df = pd.DataFrame()
    for site in hotspots:
        df = counts[counts.index.str.contains(site, regex= True, na=False)]
        hotspot_df = hotspot_df.append(df)

    hotspot_df = hotspot_df.rename(columns={"Mutation": "Num_Mutations_" + cancer_name})
    hotspot_df = hotspot_df[["Num_Mutations_" + cancer_name]]
    return hotspot_df

Use fu

In [4]:
endo_df = makeHotspotTable(en, "EC") # endometrial
col_df = makeHotspotTable(col, "CO") # colon
br_df = makeHotspotTable(l, "LUAD")

In [5]:
hotspot_table = endo_df.join(col_df, on = "Location", how = "outer")
hotspot_table = hotspot_table.join(br_df, on = "Location", how = "outer")
hotspot_table = hotspot_table.fillna(0)
hotspot_table

Name,Location,Num_Mutations_EC,Num_Mutations_CO,Num_Mutations_LUAD
G12A,G12A,1.0,0.0,1.0
G12C,G12C,4.0,2.0,16.0
G12D,G12D,6.0,11.0,6.0
G12S,G12S,1.0,0.0,2.0
G12V,G12V,10.0,7.0,5.0
G13D,G13D,7.0,4.0,0.0
Q61H,Q61H,1.0,2.0,1.0
,Q61L,0.0,1.0,0.0
,G13C,0.0,0.0,2.0


In [6]:
hotspot_table.to_csv('Table_1_KRAS_hotspot.csv', index=False)