# Create Heatmap for Immune genes Comparable to EGFR Immune Heatmap 

This notebook looks at the significant genes in at least one cancer. Pancancer heatmaps are created with circle size showing significance and color showing differences in median.

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import cptac
import cptac.utils as u
import plot_utils as p

In [None]:
print('cptac version:', cptac.version())

# Create HeatMap

In [2]:
all_df = pd.read_csv("../Make_Tables/csv/all_heatmap.csv")
sig_df = pd.read_csv('../Make_Tables/csv/sig_pval_heatmap.csv')
mult_df = pd.read_csv('../Make_Tables/csv/mult_sig_pval_heatmap.csv')

print('sig in at least one cancer:', len(sig_df.Proteomics.unique()))
print('sig in mult cancer:', len(mult_df.Proteomics.unique()))

sig in at least one cancer: 2630
sig in mult cancer: 332


In [26]:

t_only = ['CD3E', 'CD3G', 'CD4', 'CD8A', 'GRAP2', 'FYB1', 'PRKCQ']
b_only = ['PPP3CC', 'CD79A', 'SYK', 'BTK', 'LYN', 'BLNK', 'BLK', 'HLA-DMA', 'HLA-DMB']
both = ['LCK', 'ZAP70', 'VAV1', 'GRB2', 'PLCG1', 'NFATC2', 'RAC2', 'CD5', 'PTPN6', 'PTPRC', 
        'PIK3CG', 'INPP5D', 'NFKB2', 'PRKCB', 'PLCG2', 'ELMO1', 'DOCK2', 'WAS']
immune = t_only + b_only + both

In [27]:
len(immune)

34

In [28]:
# ordered and grouped proteins 
t_only = ['CD3E', 'CD3G', 'CD4', 'CD8A', 'GRAP2', 'FYB1', 'PRKCQ'] 
b_only = ['PPP3CC', 'CD79A', 'SYK', 'BTK', 'LYN', 'BLNK', 'BLK', 'HLA-DMA', 'HLA-DMB']
both = ['CD2', 'CD5', 'GRB2', 'LCK', 'ZAP70', 'VAV1', 'PTPN6', 'PTPRC', 'PIK3CD', 'PIK3CG', 'INPP5D',
        'PLCG1', 'PLCG2', 'NFATC2', 'PRKCB', 'CHUK', 'NFKB2', 'DOCK2', 'RAC2', 'ELMO1', 'WAS']
immune = t_only + b_only + both

In [29]:
len(immune)

37

In [5]:
bool_df = all_df.Proteomics.isin(immune)
plot_df = all_df[bool_df]
print('genes found in all_df:', len(plot_df.Proteomics.unique()))
plot_df.Proteomics.unique()

genes found in all_df: 37


array(['ELMO1', 'DOCK2', 'CHUK', 'SYK', 'PLCG1', 'PRKCB', 'BTK', 'PLCG2',
       'PIK3CD', 'GRB2', 'INPP5D', 'PRKCQ', 'FYB1', 'VAV1', 'WAS', 'BLNK',
       'PTPRC', 'NFKB2', 'PPP3CC', 'PIK3CG', 'CD4', 'PTPN6', 'NFATC2',
       'HLA-DMA', 'CD2', 'RAC2', 'LYN', 'HLA-DMB', 'CD8A', 'GRAP2',
       'ZAP70', 'CD3E', 'CD5', 'CD3G', 'LCK', 'CD79A', 'BLK'],
      dtype=object)

In [6]:
# Reorder index to reorder protein order in heatmap
plot_df["Index"] = plot_df["Proteomics"] + "_" + plot_df["Cancer"]
plot_df = plot_df.set_index("Index")

ordered_list = []
cancer_list = ['Brca', 'Colon', 'Endo', 'Gbm', 'Hnscc', 'Lscc', 'Luad', 'Ov']
for gene in immune:
    for cancer in cancer_list:
        ordered_list.append(gene+'_'+cancer)

ordered_df = plot_df.reindex(ordered_list)
ordered_df = ordered_df.loc[ordered_df['P_Value'] < 0.05] # Keep sig 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [7]:
                               
ordered_df = ordered_df.replace({'Hnscc': 'HNSCC', 'Luad': 'LUAD', 'Lscc': 'LSCC', 'Ovarian': 'OV', 'Colon': 'CO',
                    'Kidney': 'ccRCC', 'Breast': 'BR', 'Gbm': 'GBM', 'Endo': 'EC'})
ordered_df['Cancer'].unique()

array(['HNSCC', 'LUAD', 'GBM'], dtype=object)

In [8]:
# Find lowest p-val for legend
ordered_df.P_Value.min()

0.0006218184807157023

In [9]:
p.plotCircleHeatMap(ordered_df, circle_var = 'P_Value', color_var='Medians', x_axis= 'Proteomics', y_axis = 'Cancer',
                    plot_height=300, plot_width= 800, legend_min = 0.0001, legend_med = .001, legend_max = 0.05, save_png = "Figure_PTEN_immune.png")

In [10]:
# Sig PTEN for reference 
t_only = ['CD2', 'CD4', 'GRAP2', 'FYB1', 'PRKCQ']
b_only = ['PPP3CC', 'CD79A', 'SYK', 'BTK', 'LYN', 'BLNK', 'BLK', 'HLA-DMA', 'HLA-DMB']
both = ['LCK', 'ZAP70', 'VAV1', 'GRB2', 'PLCG1', 'NFATC2', 'RAC2', 'CD5', 'PTPN6', 'PTPRC', 
        'PIK3CG', 'INPP5D', 'NFKB2', 'PRKCB', 'PLCG2', 'ELMO1', 'DOCK2', 'WAS']
immune = t_only + b_only + both

Check all immune genes that are sig with EGFR amplification.

In [21]:
plot_df = plot_df.loc[plot_df['P_Value'] < 0.05]
p.plotCircleHeatMap(plot_df, circle_var = 'P_Value', color_var='Medians', x_axis= 'Proteomics', y_axis = 'Cancer',
                    plot_height=400, plot_width= 800, legend_min = 0.01, legend_med = 0.001, legend_max = 0.05)