# Create Heatmap for Immune genes Comparable to EGFR Immune Heatmap 

This notebook looks at the significant genes in at least one cancer. Pancancer heatmaps are created with circle size showing significance and color showing differences in median.

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
import gseapy as gp
import re
import sys 

import cptac
import cptac.utils as u

import plot_utils as p



# Create HeatMap

In [2]:
#immune = ['CD2','CD5',"CD79A","CD79B","CD274","BLK","BTK","LCK","LYN","SYK","BLNK","CD19","PLCG2","PI3K","CD5","CD22","PCK","HS1","CCL3","CCL4","PTPN6","ZAP70","LCK","ITK","CD3E","CD3G","CD4","VAV1","GRB2","ADAP","GADS","LAT","SLP76","NFAT","PLCG1","PTPN22","SOS1","HLA-DP","HLA-DM","HLA-DOA","HLA-DOB","HLA-DQ","HLA-DRA","HLA-DRB1","GZMA","CD8A","PRF1","CIITA","INFGR1","JAK1","JAK2","STAT1","B2M","HLA-A","HLA-B","HLA-C","TAP1","TAP2","PSMB8","PSMB9","PSMB10","IFDR1","TGFB1"]
#len(immune)
immune = ["CD2",'CD3E','CD3G','CD4','CD8A','LCK','ZAP70','LCP2','GRAP2','VAV1','GRB2','NFATC2','PPP3CC','RAC2','CD79A',
   'SYK','BTK','CD5','PTPN6','LYN','PTPRC','PIK3CG','INPP5D','PRKCQ', 'BLNK','PIK3CD','BLK', 'HLA-DMA', 'HLA-DMB',"NFKB2","CHUK","PLCG2","PPP3CB","MAP3K8","NFATC2","PIK3CG","SYK","BTK","RAC2","PRKCB","ELMO1", "DOCK2", "RAC2", "WAS","CD274","CD28","PDCD1","CTL4"]
len(immune)

48

In [3]:

all_df = pd.read_csv("Make_Tables/csv/all_heatmap.csv")
sig_df = pd.read_csv('Make_Tables/csv/sig_pval_heatmap.csv')
mult_df = pd.read_csv('Make_Tables/csv/mult_sig_pval_heatmap.csv')

print('sig in at least one cancer:', len(sig_df.Proteomics.unique()))
print('sig in mult cancer:', len(mult_df.Proteomics.unique()))

FileNotFoundError: [Errno 2] File Make_Tables/csv/all_heatmap.csv does not exist: 'Make_Tables/csv/all_heatmap.csv'

In [None]:
bool_df = all_df.Proteomics.isin(immune)
plot_df = all_df[bool_df]
print('genes found in all_df:', len(plot_df.Proteomics.unique()))
plot_df

In [None]:

plot_df['P_Value'] = plot_df['P_Value'].where(plot_df['P_Value'] < 0.05, 1)
p.plotCircleHeatMap(plot_df, circle_var = 'P_Value', color_var='Medians', x_axis= 'Proteomics', y_axis = 'Cancer',
                    plot_height=400, plot_width= 800, legend_min = 0.01, legend_max = 0.05)

In [None]:
plot_df["Index"] = plot_df["Proteomics"] + "_" + plot_df["Cancer"]
plot_df = plot_df.set_index("Index")
ordered_df = plot_df.reindex(["CD2_Hnscc",'CD4_Hnscc','LCK_Hnscc', 'ZAP70_Hnscc', 'VAV1_Hnscc', 'GRB2_Hnscc', 'NFATC2_Hnscc','RAC2_Hnscc','SYK_Hnscc','BTK_Hnscc','CD5_Hnscc','PTPRC_Hnscc', "PIK3CD_Gbm", 'PIK3CG_Hnscc','INPP5D_Hnscc','PRKCQ_Hnscc','BLNK_Hnscc',"NFKB2_Hnscc","CHUK_Hnscc","PRKCB_Hnscc","PLCG2_Hnscc","ELMO1_Hnscc","DOCK2_Hnscc","RAC2_Hnscc","WAS_Hnscc",
                                    'VAV1_Gbm', 'GRB2_Gbm', 'SYK_Gbm', 'BTK_Gbm','PTPRC_Gbm', 'INPP5D_Gbm','PRKCQ_Gbm', 'PIK3CD_Gbm','BLNK_Gbm',"CHUK_Gbm","PRKCB_Gbm","PLCG2_Gbm", "ELMO1_Gbm","DOCK2_Gbm",'WAS_Gbm',
                                    "CD2_Luad",'CD4_Luad','LCK_Luad', 'ZAP70_Luad','RAC2_Luad','CD5_Luad','PTPRC_Luad','PIK3CD_Luad',"PRKCB_Luad","CHUK_Luad","ELMO1_Luad","RAC2_Luad","WAS_Luad"])
                                    #"HLA-DMA_Gbm","HLA-DMB_Gbm"])

In [None]:
#ordered_df = ordered_df.loc[plot_df['P_Value'] < 0.05] # 25 if not < .05
ordered_df['P_Value'] = ordered_df['P_Value'].where(ordered_df['P_Value'] < 0.05, 1) # pvals > 0.05 not seen on heatmap

In [None]:
# Find min pval
ordered_df['P_Value'].min()

In [None]:
p.plotCircleHeatMap(ordered_df, circle_var = 'P_Value', color_var='Medians', x_axis= 'Proteomics', y_axis = 'Cancer',
                    plot_height=400, plot_width= 800, legend_min = 0.0005, legend_med = .005, legend_max = 0.05, 
                    save_png = "Figure_PTEN_immune_1.png")