# Create Heatmap for EGFR, PIK3CA, and PTEN

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
import gseapy as gp
import re
import sys 

import cptac
import cptac.utils as u

import plot_utils as p

In [37]:
root = R'~\Github\WhenMutationsDontMatter\PTEN\Step_3_trans_effect\csv'
df = pd.read_csv(root+R'\all_heatmap.csv')

In [38]:
genes = ['PTEN','EGFR','PIK3CA']
bool_df = df.Proteomics.isin(genes)
plot_df = df[bool_df]
len(plot_df.Proteomics.unique())

3

In [39]:
# Only include p-values < a 
a = 0.05
plot_df = plot_df.loc[plot_df['P_Value'] <= a]

In [40]:
p.plotCircleHeatMap(plot_df, circle_var = 'P_Value', color_var='Medians', x_axis= 'Proteomics', y_axis = 'Cancer',
                    plot_height=400, plot_width= 300)

In [15]:
plot_df.loc[plot_df['Proteomics'] == 'EGFR']

Unnamed: 0,Proteomics,P_Value,Medians,Cancer,size2,size
3310,EGFR,0.04010871,1.001035,Gbm,3.216162,9.648485
12394,EGFR,0.0177187,0.467227,Hnscc,4.033135,12.099404
22336,EGFR,9.121167e-07,2.3297,Luad,13.907498,41.722494
45031,EGFR,0.01661374,0.58645,Brca,4.097525,12.292576
66061,EGFR,0.006128118,0.526,En,5.094868,15.284603


# Check luad has actual data for EGFR

In [19]:
l = cptac.Luad()

                                         



In [29]:
# Returns a dataframe with proteomics and mutation type

def format_df(cancer_object, trans, gene_in = 'PTEN', utils = u):
    mut_type = cancer_object.get_genotype_all_vars(gene_in)
    
    if cancer_object.get_cancer_type() not in ('luad'):
        # Keep only tumor samples from proteomics
        prot_and_mutations = cancer_object.join_omics_to_mutations(
            mutations_genes = [gene_in], omics_df_name = 'proteomics', omics_genes = trans)
        prot_and_mutations = prot_and_mutations[prot_and_mutations.Sample_Status == "Tumor"] # drop Normal samples
        
        # Reduce a multiindex 
        if isinstance(prot_and_mutations.keys(), pd.core.indexes.multi.MultiIndex):
            prot_and_mutations = utils.reduce_multiindex(prot_and_mutations, levels_to_drop = 1)

        # Merge Mutation column from get_genotype_all_vars (includes cnv) with proteomics
        mut_type = mut_type[['Mutation']] 
        prot_df = prot_and_mutations.iloc[:,:-4] # Keep only proteomics
        merged = prot_df.join(mut_type)
        
        # Keep only Wildtype and deletion
        compare = ['Wildtype_Tumor','Deletion']
        get = merged['Mutation'].isin(compare)
        del_wt = merged[get]
    
    
    # Luad has no somatic mutations for PTEN which changes some things
    else: 
        # get_genotype_all_vars add cnv data under the column PTEN
        mut_type = mut_type.drop(columns=gene_in)
        # different code because no somatic mutation data for pten (can't join to somatic mutations)
        omics = cancer_object.join_omics_to_omics(df1_name = 'CNV', df2_name='proteomics',genes1= gene_in, 
            genes2=trans)
        omics = utils.reduce_multiindex(omics, levels_to_drop = 1, flatten = True)
        omics = omics.drop(columns='PTEN_CNV')
        # Get only tumor samples
        p = cancer_object.get_proteomics(tissue_type='tumor')
        tumor_ids = list(p.index)
        get = omics.index.isin(tumor_ids)
        omics = omics[get]
        # Merge Mutation column from get_genotype_all_vars (includes cnv) with proteomics
        merged = omics.join(mut_type) 
        # Keep only No_Mutation (wildtype) and deletion
        compare = ['No_Mutation','Deletion']
        get = merged['Mutation'].isin(compare)
        del_wt = merged[get]
        del_wt['Mutation'] = np.where(
            del_wt['Mutation'] == 'No_Mutation', 'Wildtype_Tumor', 'Deletion')

    return del_wt


In [34]:
test = format_df(l, 'EGFR')
test['EGFR_proteomics'].value_counts()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


 1.0362    1
-0.1776    1
-0.5937    1
-0.3358    1
-0.7235    1
          ..
 1.6805    1
 2.3528    1
 0.3395    1
-2.8313    1
-0.3750    1
Name: EGFR_proteomics, Length: 105, dtype: int64