# Immune Circle Heatmap

This notebooks takes the genes that are a hit from the Brca reactome gsea (metabolism pathway) and maps them on a large circle heat map. 

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats
import re
import sys 
import statsmodels.stats.multitest


import gseapy as gp
from gseapy.plot import barplot, dotplot

import cptac
import cptac.utils as u
import plot_utils as p 

  import pandas.util.testing as tm


In [3]:
import statsmodels.stats.multitest
from bokeh.palettes import RdBu
from bokeh.models import LinearColorMapper, ColumnDataSource, ColorBar
from bokeh.models.ranges import FactorRange
from bokeh.plotting import figure, show
from bokeh.io import output_notebook, export_png, export_svgs
from bokeh.layouts import row
import math as math

def plotCircleHeatMap ( df, circle_var, color_var, x_axis, y_axis,plot_width= 1000, plot_height = 650, x_axis_lab = "no_label", y_axis_lab = "", show_plot = True, save_png = "plot.png"):
  
    # circle_var designed for pvalues. Normalized by taking log 10 of values and multiplying by 5 
    #added a new column to make the plot size
    
    df["size2"] = df[circle_var].apply(lambda x: -1*(np.log(x)))
    df['size'] = (df["size2"])*3
    #find values to set color bar min/ max as 
    maxval = df[color_var].max()
    minval = df[color_var].min()
    if maxval > abs(minval):
        minval = maxval * -1 
    if maxval < abs(minval):
        maxval = minval * -1
    colors = list((RdBu[9]))
    exp_cmap = LinearColorMapper(palette=colors, low = minval, high = maxval)
    p = figure(x_range = FactorRange(), y_range = FactorRange(), plot_width= plot_width, 
               plot_height=plot_height, 
               toolbar_location=None, tools="hover")

    p.scatter(x_axis,y_axis,source=df, fill_alpha=1,  line_width=0, size="size", 
              fill_color={"field":color_var, "transform":exp_cmap})

    p.x_range.factors = sorted(df[x_axis].unique().tolist())
    p.y_range.factors = sorted(df[y_axis].unique().tolist(), reverse = True)
    p.xaxis.major_label_orientation = math.pi/2
    
    if (x_axis_lab != "no_label" ):
        p.xaxis.axis_label = x_axis_lab
    if (x_axis_lab != "no_label" ):   
        p.yaxis.axis_label = y_axis_lab

    bar = ColorBar(color_mapper=exp_cmap, location=(0,0))
    p.add_layout(bar, "right")
    
    # Create Circle Legend
    circle_legend = create_circle_legend(df, circle_var, color_var)
    
    if show_plot:
        output_notebook()
        show(row(p, circle_legend))
      
    if save_png != "plot.png":
        export_png(p, filename= save_png)
             
        
'''
@Param df: Dataframe. Same as df passed to plotCircleHeatMap.
@Param lowest_pval: Float. Lowest p-value to include in the legend.
@Param highest_pval: Float. Highest p-value to include in the legend.

Returns: df to be used in creating the circle legend. 
'''

def create_circle_legend_df(lowest_pval = 1e-6, highest_pval = .05):
    lowest_pval_str = "{:.1e}".format(lowest_pval, '.2f')
    med_pval_str = "{:.1e}".format(lowest_pval * float(100), '.2f')
    highest_pval_str = "{:.1e}".format(highest_pval, '.2f')
    
    
    data = {'P_Value':  [lowest_pval, (lowest_pval * float(100)), highest_pval],
            'y_axis': [lowest_pval_str, med_pval_str, highest_pval_str],
            'x_axis': ['', '', ''],
            'Correlation': [-.5, -.5, -.5]}

    fake_df = pd.DataFrame (data, columns = ['x_axis', 'y_axis', 'P_Value', "Correlation"])
    
    fake_df["size2"] = fake_df['P_Value'].apply(lambda x: -1*(np.log(x)))
    fake_df['size'] = (fake_df["size2"])*3
    
    return fake_df

'''
@Param df: Dataframe. Same as df passed to plotCircleHeatMap.
@Param circle_var: Column Label. Same as passed to plotCircleHeatMap.
@Param color_var: Column Label. Same as passed to plotCircleHeatMap.
@Param x_axis: Column Label. Used on the x-axis.
@Param y_axis: Column Label. Used on the y-axis.
@Param lowest_pval: Float. Lowest p-value to include in the legend.
@Param highest_pval: Float. Highest p-value to include in the legend.

Returns: df to be used in creating the circle legend. 
'''

def create_circle_legend(df, circle_var, color_var, x_axis = 'x_axis', y_axis = 'y_axis', 
                         lowest_pval = 1e-6, highest_pval = .05, plot_height = 200, plot_width = 120):
    # Use the smallest pval
    if df[circle_var].min() < lowest_pval:
        lowest_pval = df[circle_var].min()
    circle_df = create_circle_legend_df(lowest_pval, highest_pval)
    
    
    circle = figure(x_range = FactorRange(), y_range = FactorRange(), plot_width= plot_width, 
               plot_height=plot_height, toolbar_location=None, tools="hover")

    circle.scatter(x_axis, y_axis, source = circle_df, fill_alpha=1,  line_width=0, size="size")
    
    circle.x_range.factors = sorted(circle_df[x_axis].unique().tolist())
    circle.y_range.factors = sorted(circle_df[y_axis].unique().tolist(), reverse = False)
    circle.xaxis.major_label_orientation = math.pi/2
    
    circle.xaxis.axis_label = 'FDR P-Values'
    
    return circle
      


In [5]:
prot_FDR = pd.read_csv("../Step3.1_Pearson_dfs_by_cancer/csv_files/EGFR_all_pearson_FDR.csv")
df_FDR= prot_FDR.drop(['Unnamed: 0'], axis=1)
df_FDR = df_FDR.set_index("Comparison")
df1_transposed = df_FDR.T 
df1_transposed


FileNotFoundError: [Errno 2] File ../Step3.1_Pearson_dfs_by_cancer/csv_files/EGFR_all_pearson_FDR.csv does not exist: '../Step3.1_Pearson_dfs_by_cancer/csv_files/EGFR_all_pearson_FDR.csv'

In [100]:
#filter down df with just metabolism genes 
immune_column_names = []
for gene in t_cell:
    gene += "_proteomics"
    immune_column_names.append(gene)

df_FDR_immune = df_FDR_append[df_FDR_append.Comparison.isin(immune_column_names)]
df_FDR_immune = df_FDR_immune.replace(to_replace ='_proteomics', value = '', regex = True)

In [101]:
df_FDR_immune

Unnamed: 0,Comparison,Correlation,P_value,fdr_bh_p_val,Cancer Type
2,GRB2,-0.610889,1.886384e-11,6.72999e-08,GBM
58,CSK,-0.481565,4.507321e-07,8.176587e-05,GBM
83,CD4,-0.467479,1.069659e-06,0.0001362923,GBM
100,LCP2,-0.460881,1.582634e-06,0.0001665085,GBM
127,VAV1,-0.447143,3.486858e-06,0.0002894593,GBM
2264,GRAP2,-0.30529,0.001461547,0.0209574,Kidney
2394,VAV1,-0.285693,0.002482501,0.02991953,Kidney
2486,ZAP70,-0.276087,0.003505757,0.03791846,Kidney
5838,ZAP70,-0.384613,3.346413e-05,0.002105158,Luad
5884,LCK,-0.370894,6.64184e-05,0.003302385,Luad


In [83]:
#Make plot using plot utils
plotCircleHeatMap(df_FDR_immune, circle_var = "fdr_bh_p_val", color_var = "Correlation", x_axis = "Comparison", y_axis = "Cancer Type",plot_width= 600, plot_height = 450,save_png = "immune.png", x_axis_lab= "Proteomics")

In [84]:
b_cell =  ["BLNK","BTK","SYK","SH3KBP1","LYN","BLK","CD5","PLCG2","CD79A"]

In [85]:
immune_column_names = []
for gene in b_cell:
    gene += "_proteomics"
    immune_column_names.append(gene)


In [91]:

df_FDR_immune = df_FDR_append[df_FDR_append.Comparison.isin(immune_column_names)]

df_FDR_immune = df_FDR_immune.replace(to_replace ='_proteomics', value = '', regex = True)

In [93]:
plotCircleHeatMap(df_FDR_immune, "P_value","Correlation","Comparison","Cancer Type",plot_width= 550,x_axis_lab = "Proteomics", plot_height = 450,)

In [94]:
app_components_and_reg = ["HLA-A","HLA-B","HLA-C","B2M","TAP2","TAP1","TAPBP","PSMB8","PSMB9","PSMB10", "IFNGR1","IFNGR2","JAK1","JAK2","STAT1","IGR1"]


In [95]:
immune_column_names = []
for gene in app_components_and_reg:
    gene += "_proteomics"
    immune_column_names.append(gene)

df_FDR_immune = df_FDR_append[df_FDR_append.Comparison.isin(immune_column_names)]

df_FDR_immune = df_FDR_immune.replace(to_replace ='_proteomics', value = '', regex = True)

In [97]:
plotCircleHeatMap(df_FDR_immune, "P_value","Correlation","Comparison","Cancer Type",plot_width= 500, x_axis_lab = "Proteomics", plot_height = 450,)

In [48]:
df_FDR_immune

Unnamed: 0,Comparison,Correlation,P_value,fdr_bh_p_val,Cancer Type,size2,size
7455,PSMB8_proteomics,-0.318132,0.000749,0.013763,Hnscc,7.197313,21.59194
7501,PSMB10_proteomics,-0.31117,0.00099,0.01682,Hnscc,6.918044,20.754131
7672,PSMB9_proteomics,-0.291602,0.002095,0.027829,Hnscc,6.168337,18.50501


In [57]:
app_regulators = ["IFNGR1","IFNGR2","JAK1","JAK2","STAT1","IGR1"]

In [58]:
immune_column_names = []
for gene in app_regulators:
    gene += "_proteomics"
    immune_column_names.append(gene)

df_FDR_immune = df_FDR_append[df_FDR_append.Comparison.isin(immune_column_names)]

In [59]:
plotCircleHeatMap(df_FDR_immune, "P_value","Correlation","Comparison","Cancer Type",plot_width= 500, plot_height = 450,)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [63]:
df_FDR_append_all = pd.read_csv("../Step3.2_combining_pearson_dfs/csv_files/pancan_EGFR_pearson_all_prot_append_return_all.csv")

In [66]:

df_FDR_immune_all = df_FDR_append_all[df_FDR_append_all.Comparison.isin(immune_column_names)]

In [67]:
plotCircleHeatMap(df_FDR_immune_all, "P_value","Correlation","Comparison","Cancer Type",plot_width= 500, plot_height = 450,)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [6]:
# get a list of genes in a complete Pathway 
#This one is from Reactomes Innate immune system pathway
df2 = u.search_reactome_proteins_in_pathways("R-HSA-168256.7")
immune_genes = df2.member.unique()

immune_all_genes = []
for gene in immune_genes:
    gene += "_proteomics"
    immune_all_genes.append(gene)

In [2]:
df_all_wide = pd.read_csv("../Step3.2_combining_pearson_dfs/csv_files/pancan_EGFR_all_return_all_wide.csv")

In [3]:
df_all_wide= df_all_wide.drop(['Unnamed: 0'], axis=1)
df_all_wide


Unnamed: 0,Comparison,Correlation_Gbm,P_value_Gbm,Correlation_kidney,P_value_kidney,Correlation_Ovar,P_value_Ovar,Correlation_Brca,P_value_Brca,Correlation_Luad,P_value_Luad,Correlation_hnscc,P_value_hnscc,Correlation_colon,P_value_colon,Correlation_Lscc,P_value_Lscc
0,GRB2_proteomics,-0.610889,1.886384e-11,-0.217427,0.022506,-0.190090,0.085202,-0.177379,0.057899,-0.302439,0.001321,-0.532341,2.559824e-09,0.150960,0.139949,-0.198042,0.039921
1,SOCS2_proteomics,0.562720,1.343464e-09,,,,,,,,,0.020297,8.984786e-01,,,0.472624,0.000308
2,CDH4_proteomics,0.559180,1.790048e-09,0.148407,0.257781,,,,,,,,,,,,
3,PLA2G15_proteomics,-0.556624,2.197562e-09,-0.298029,0.001566,-0.035395,0.759903,0.274185,0.003025,-0.182930,0.055768,-0.089313,3.557176e-01,,,-0.147438,0.127827
4,DAB2_proteomics,-0.556402,2.237010e-09,-0.076173,0.428981,0.076981,0.489114,0.326055,0.000376,-0.086403,0.369438,-0.208437,2.962992e-02,-0.147519,0.149313,-0.072496,0.455900
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24326,TAGAP_proteomics,,,,,,,,,,,,,,,-0.002387,0.983341
24327,ZNF283_proteomics,,,,,,,,,,,,,,,0.002610,0.983540
24328,TRIM39_proteomics,,,,,,,,,,,,,,,-0.002379,0.985114
24329,ADGRA3_proteomics,,,,,,,,,,,,,,,0.002228,0.987751


In [34]:

df_all_immune = df_all_wide[df_all_wide.Comparison.isin(immune_all_genes)]

In [35]:

df_all_immune = df_all_immune.rename(columns={"Comparison": "Trans_Gene"})


df_all_immune = df_all_immune.replace(to_replace ='_proteomics', value = '', regex = True)
df_all_immune = df_all_immune.set_index("Trans_Gene")

In [37]:
df_all_immune.to_csv("Immune_trans_effects.csv")

In [38]:
df_all_immune

Unnamed: 0_level_0,Correlation_Gbm,P_value_Gbm,Correlation_kidney,P_value_kidney,Correlation_Ovar,P_value_Ovar,Correlation_Brca,P_value_Brca,Correlation_Luad,P_value_Luad,Correlation_hnscc,P_value_hnscc,Correlation_colon,P_value_colon,Correlation_Lscc,P_value_Lscc
Trans_Gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
GRB2,-0.610889,1.886384e-11,-0.217427,0.022506,-0.190090,0.085202,-0.177379,0.057899,-0.302439,0.001321,-0.532341,2.559824e-09,0.150960,0.139949,-0.198042,0.039921
SOCS2,0.562720,1.343464e-09,,,,,,,,,0.020297,8.984786e-01,,,0.472624,0.000308
GLA,-0.550491,3.570229e-09,-0.298348,0.001547,0.022300,0.841394,0.069504,0.460453,0.163722,0.087439,-0.019486,8.406062e-01,0.057118,0.578410,-0.033880,0.727770
CTSC,-0.546285,4.951742e-09,-0.302316,0.001328,-0.072453,0.515094,0.266940,0.003928,0.302760,0.001305,-0.052297,5.891481e-01,0.193621,0.057401,-0.037491,0.700078
SHC1,-0.540524,7.695118e-09,,,0.175098,0.113358,0.090908,0.333927,-0.135844,0.262164,-0.085910,3.744159e-01,0.203250,0.045855,0.134121,0.166399
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ASB8,,,,,,,,,,,,,,,0.129254,0.351591
TGFA,,,,,,,,,,,,,,,0.093917,0.442737
DUSP1,,,,,,,,,,,,,,,-0.105239,0.518098
FPR2,,,,,,,,,,,,,,,0.085891,0.536878
