# Create Heatmap for Immune genes

This notebook looks at the significant genes in at least one cancer. Pancancer heatmaps are created with circle size showing significance and color showing differences in median.

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
import gseapy as gp
import re
import sys 

import cptac
import cptac.utils as u

import plot_utils as p



# Create HeatMap

In [2]:
immune = ['CD2','CD5',"CD79A","CD79B","CD274","BLK","BTK","LCK","LYN","SYK","BLNK","CD19","PLCG2","PI3K","CD5","CD22","PCK","HS1","CCL3","CCL4","PTPN6","ZAP70","LCK","ITK","CD3E","CD3G","CD4","VAV1","GRB2","ADAP","GADS","LAT","SLP76","NFAT","PLCG1","PTPN22","SOS1","HLA-DP","HLA-DM","HLA-DOA","HLA-DOB","HLA-DQ","HLA-DRA","HLA-DRB1","GZMA","CD8A","PRF1","CIITA","INFGR1","JAK1","JAK2","STAT1","B2M","HLA-A","HLA-B","HLA-C","TAP1","TAP2","PSMB8","PSMB9","PSMB10","IFDR1","TGFB1"]
len(immune)

63

In [7]:
all_df = pd.read_csv("../Make_Tables/csv/all_heatmap.csv")
sig_df = pd.read_csv('../Make_Tables/csv/sig_pval_heatmap.csv')
mult_df = pd.read_csv('../Make_Tables/csv/mult_sig_pval_heatmap.csv')

print('sig in at least one cancer:', len(sig_df.Proteomics.unique()))
print('sig in mult cancer:', len(mult_df.Proteomics.unique()))

sig in at least one cancer: 2630
sig in mult cancer: 332


In [4]:
bool_df = all_df.Proteomics.isin(immune)
plot_df = all_df[bool_df]
print('genes found in df with all proteins:', len(plot_df.Proteomics.unique()))
plot_df = plot_df.loc[plot_df['P_Value'] < 0.05]

genes found in df with all proteins: 45


In [5]:
p.plotCircleHeatMap(plot_df, circle_var = 'P_Value', color_var='Medians', x_axis= 'Proteomics', y_axis = 'Cancer',
                    plot_height=400, plot_width= 800)

In [6]:
immune = u.get_proteins_in_pathways('R-HSA-168256', 'reactome')
immune_list = list(immune.member)

In [7]:
adaptive = u.get_proteins_in_pathways('R-HSA-1280218', 'reactome')
innate = u.get_proteins_in_pathways('R-HSA-168249', 'reactome')
cytokine = u.get_proteins_in_pathways('R-HSA-1280215', 'reactome')

adaptive_list = list(adaptive.member)
innate_list = list(innate.member)
cytokine_list = list(cytokine.member)

all_list = adaptive_list + innate_list #+ cytokine_list
len(all_list)

gene_list = all_list

In [8]:
print('total mult sig:',len(mult_df.Proteomics.unique()))

bool_df = mult_df.Proteomics.isin(adaptive_list)
adaptive_df = mult_df[bool_df]
adaptive_mult_sig = list(adaptive_df.Proteomics.unique())
print('adaptive:', len(adaptive_mult_sig))

bool_df = mult_df.Proteomics.isin(innate_list)
innate_df = mult_df[bool_df]
innate_mult_sig = list(innate_df.Proteomics.unique())
print('innate:', len(innate_mult_sig))

total mult sig: 332
adaptive: 32
innate: 37


In [9]:
unique_adaptive = [value for value in adaptive_mult_sig if value not in innate_mult_sig] 
unique_innate = [value for value in innate_mult_sig if value not in adaptive_mult_sig]

In [10]:
bool_df = mult_df.Proteomics.isin(gene_list)
plot_df = mult_df[bool_df]
print('genes found in df with all proteins:', len(plot_df.Proteomics.unique()))
plot_df = plot_df.loc[plot_df['P_Value'] < 0.05]
#list(plot_df.Proteomics.unique())

genes found in df with all proteins: 53


In [11]:
p.plotCircleHeatMap(plot_df, circle_var = 'P_Value', color_var='Medians', x_axis= 'Proteomics', y_axis = 'Cancer',
                    plot_height=400, plot_width= 1000, legend_max = 0.05)

In [12]:
b_cell_reactome = u.get_proteins_in_pathways('R-HSA-983705', 'reactome')
b_cell_list = list(b_cell_reactome.member)
len(b_cell_list)

175

In [22]:
noncan = 'R-HSA-5676590'
calcinerin_NFAT = 'R-HSA-2025928'
nf_df = u.get_proteins_in_pathways(calcinerin_NFAT, 'reactome') #NF-kappa activation in B cells
nf = list(nf_df.member)

In [26]:
b_cell = ['MAP4K1', 'PPP3CB', 'PTPRC', 'SYK', 'CHUK', 'PIK3CA', 'BLNK', 'BTK', 'PLCG2']

t_cell = ['ZAP70', 'CD4', 'PRKCB', 'WAS'] #'MAP4K1', 'PTPRC', 'CHUK'

#class_1_pi3k = ['PIK3CD', 'CYTH1'] #'BLNK', 'SYK', 'PIK3CA', 'PLCG2', 'BTK', 'ZAP70'

cyto_mediated = ['GNG2', 'RAC2', 'ELMO1', 'DOCK2', 'GNAI2'] #'PRKCB'

alt_NF_kappa = ['CHUK', 'NFKB2'] # CHUK

other = ['CD5','CD2', 'PRKDC', 'PIK3CD', 'JAK1', 'JAK2', 'STAT1'] #interferon (JAK, STAT)

both_cell = b_cell + t_cell + alt_NF_kappa + other + cyto_mediated

In [27]:
bool_df = sig_df.Proteomics.isin(both_cell)
plot_df = sig_df[bool_df]
print('genes found in df with all proteins:', len(plot_df.Proteomics.unique()))
plot_df = plot_df.loc[plot_df['P_Value'] < 0.05]

genes found in df with all proteins: 26


In [28]:
p.plotCircleHeatMap(plot_df, circle_var = 'P_Value', color_var='Medians', x_axis= 'Proteomics', y_axis = 'Cancer',
                    plot_height=400, plot_width= 800, legend_min = 1e-4, legend_max = 0.05)

In [16]:
# Returns the number of significant tests (num of cancers with sig tests for a certain gene)
def Pvalue_sig(row):
    numSig = 0

    for item in row:
        if pd.isnull(item):
            continue
        if item < 0.05: # Using corrected p-values from FDR-bh
            numSig += 1
            
    return numSig

In [17]:
df_fdr_pvals = pd.read_csv(root+r"\all_proteins.csv")
df_fdr_pvals = df_fdr_pvals.loc[df_fdr_pvals['Proteomics'] != 'PTEN'] # Drop PTEN

In [18]:
# Use Pvalue_sig to add a column with the count of sig tests for each gene across the cancers
df_fdr_pvals["Num_sig"] = df_fdr_pvals.loc[:,["Gbm_P_Value","Hnscc_P_Value","Luad_P_Value","Lscc_P_Value", "Brca_P_Value","Ov_P_Value","En_P_Value","Colon_P_Value"]].apply(Pvalue_sig, axis = 1)
df_fdr_pvals = df_fdr_pvals.sort_values(by=['Num_sig'],ascending=False)

In [19]:
# Keep trans genes that are significant in at least 1 cancer
df_fdr_pvals_1 = df_fdr_pvals[df_fdr_pvals['Num_sig'].isin([3])]
print('Total genes sig in at least 1 cancer:', len(df_fdr_pvals_1.Proteomics.unique()))
sig_3 = list(df_fdr_pvals_1.Proteomics)

Total genes sig in at least 1 cancer: 38


In [20]:
t = [value for value in sig_3 if value in all_list]

In [21]:
print('immune genes sig in 3 cancers:')
t

immune genes sig in 3 cancers:


['ELMO1', 'ACTR10', 'CHUK', 'PTPRC', 'WAS', 'DCTN1', 'PRKCB']