# Create Legend for Circle Size (Heatmaps)

In [231]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math as math
import scipy.stats
import re
import sys
import statsmodels.stats.multitest
from bokeh.palettes import RdBu
from bokeh.models import LinearColorMapper, ColumnDataSource, ColorBar, LabelSet
from bokeh.models.ranges import FactorRange
from bokeh.plotting import figure, show
from bokeh.io import output_notebook, export_png, export_svgs
from bokeh.layouts import row

import cptac
import cptac.utils as u
import plot_utils as plu

Create df with fake data for the legend.

In [96]:
root = R'~\Github\WhenMutationsDontMatter\PTEN\Step_3_trans_effect\csv'
all_df = pd.read_csv(root+R'\all_heatmap.csv')

gene_df1 = all_df.loc[all_df['Proteomics'] == 'MCM3']

In [248]:
def plotCircleHeatMap ( df, circle_var, color_var, x_axis, y_axis,plot_width= 1000, plot_height = 650, x_axis_lab = "no_label", y_axis_lab = "no_label", show_plot = True, save_png = "plot.png"):
  
    # circle_var designed for pvalues. Normalized by taking log 10 of values and multiplying by 5 
    #added a new column to make the plot size
    
    
    df = add_legend_df(df, df[circle_var].min())
    
    ###
    
    df["size2"] = df[circle_var].apply(lambda x: -1*(np.log(x)))
    df['size'] = (df["size2"])*3
    #find values to set color bar min/ max as 
    maxval = df[color_var].max()
    minval = df[color_var].min()
    if maxval > abs(minval):
        minval = maxval * -1 
    if maxval < abs(minval):
        maxval = minval * -1
    colors = list((RdBu[9]))
    exp_cmap = LinearColorMapper(palette=colors, low = minval, high = maxval)
    p = figure(x_range = FactorRange(), y_range = FactorRange(), plot_width= plot_width, 
               plot_height=plot_height, 
               toolbar_location=None, tools="hover")

    p.scatter(x_axis,y_axis,source=df, fill_alpha=1,  line_width=0, size="size", 
              fill_color={"field":color_var, "transform":exp_cmap})

    p.x_range.factors = sorted(df[x_axis].unique().tolist())
    p.y_range.factors = sorted(df[y_axis].unique().tolist(), reverse = True)
    p.xaxis.major_label_orientation = math.pi/2
    
    if (x_axis_lab != "no_label" ):
        p.xaxis.axis_label = x_axis_lab
    if (x_axis_lab != "no_label" ):   
        p.yaxis.axis_label = y_axis_lab

    bar = ColorBar(color_mapper=exp_cmap, location=(0,0))
    p.add_layout(bar, "right")
    
    
    
    #######
    # Create Circle Legend
    
    source = ColumnDataSource(data= {'P_Value':  [1e-6, 1e-3, 0.05],
                                    'Cancer': ['Colon','Gbm','Lscc'],
                                    'Proteomics': ['z', 'z', 'z'],
                                    'Medians': [1.5, 1.5, 1.5]})
    
    circle = figure(x_range = FactorRange(), y_range = FactorRange(), plot_width= 100, 
               plot_height=plot_height, toolbar_location=None, tools="hover")

    circle.scatter(x_axis,y_axis,source=source, fill_alpha=1,  line_width=0, size="size", 
              fill_color={"field":color_var, "transform":exp_cmap})
    circle.x_range.factors = sorted(df[x_axis].unique().tolist())
    circle.y_range.factors = sorted(df[y_axis].unique().tolist(), reverse = True)
    circle.xaxis.major_label_orientation = math.pi/2
    
    
    
    # Create Pval annotations
    labels = LabelSet(x='Proteomics', y='Cancer', text='P_Value', level='glyph',
              x_offset=-20, y_offset=-40, source=source, render_mode='canvas')
    df["size2"] = df[circle_var].apply(lambda x: -1*(np.log(x)))
    df['size'] = (df["size2"])*3
    
    circle.add_layout(labels)
    
    if show_plot:
        output_notebook()
        show(row(p, circle)) # Show main heatmap, then circle_key
      
    if save_png != "plot.png":
        export_png(p, filename= save_png)

In [249]:
plotCircleHeatMap(gene_df1, circle_var = 'P_Value', color_var='Medians', x_axis= 'Proteomics', y_axis = 'Cancer',
                    plot_height=500, plot_width= 500)

ERROR:bokeh.core.validation.check:E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name. This could either be due to a misspelling or typo, or due to an expected column being missing. : key "size" value "size" [renderer: GlyphRenderer(id=50876, glyph=Scatter(id='50874', ...), ...)]


In [168]:
# smallest pval = 5.396032e-11   largest pval = .01

def add_legend_df(df, lowest_pval = 1e-6, highest_pval = .05):
    data = {'P_Value':  [0, lowest_pval, 0, (lowest_pval * float(100)), 0, highest_pval, 0, 0,],
            'Cancer': ['Brca','Colon','En','Gbm','Hnscc','Lscc','Luad','Ov'],
            'Proteomics': ['z', 'z', 'z', 'z', 'z', 'z', 'z', 'z'],
            'Medians': [1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5]}

    fake_df = pd.DataFrame (data, columns = ['Cancer', 'Proteomics', 'P_Value', 'Medians'])

    new_df = df.append(fake_df)
    return new_df
        

In [159]:
plot_df = add_legend_df(gene_df1)
plot_df

Unnamed: 0,Proteomics,P_Value,Medians,Cancer,size2,size
266,MCM3,8.3e-05,0.926526,Gbm,9.392388,28.177164
11437,MCM3,0.000765,0.486629,Hnscc,7.176281,21.528843
25781,MCM3,0.154923,0.9383,Luad,1.864824,5.594472
33225,MCM3,5.1e-05,1.3377,Lscc,9.882141,29.646422
45384,MCM3,0.070444,0.4629,Brca,2.652931,7.958793
58072,MCM3,0.417795,-0.144983,Ov,0.872765,2.618296
65065,MCM3,0.015138,-0.504,En,4.190549,12.571648
81108,MCM3,0.785105,-0.014,Colon,0.241938,0.725815
0,z,0.0,1.5,Brca,,
1,z,1e-06,1.5,Colon,,


In [162]:
plu.plotCircleHeatMap(gene_df1, circle_var = 'P_Value', color_var='Medians', x_axis= 'Proteomics', y_axis = 'Cancer',
                    plot_height=500, plot_width= 500)

In [119]:
new

Unnamed: 0,Proteomics,P_Value,Medians,Cancer,size2,size
266,MCM3,8.3e-05,0.926526,Gbm,9.392388,28.177164
11437,MCM3,0.000765,0.486629,Hnscc,7.176281,21.528843
25781,MCM3,0.154923,0.9383,Luad,1.864824,5.594472
33225,MCM3,5.1e-05,1.3377,Lscc,9.882141,29.646422
45384,MCM3,0.070444,0.4629,Brca,2.652931,7.958793
58072,MCM3,0.417795,-0.144983,Ov,0.872765,2.618296
65065,MCM3,0.015138,-0.504,En,4.190549,12.571648
81108,MCM3,0.785105,-0.014,Colon,0.241938,0.725815
0,z,0.0,1.5,Brca,inf,inf
1,z,1e-06,1.5,Colon,13.815511,41.446532


In [75]:
root = R'~\Github\WhenMutationsDontMatter\PTEN\Step_3_trans_effect\csv'
all_df = pd.read_csv(root+R'\all_heatmap.csv')


In [78]:
gene_df1 = all_df.loc[all_df['Proteomics'] == 'MCM3']
gene_df2 = gene_df1.assign(fake_median = .9)
gene_df3 = gene_df2.set_index('Cancer')
gene_df4 = gene_df3.join(fake_df)
gene_df = gene_df4.reset_index()
gene_df = gene_df.assign(Key = '')
gene_df


Unnamed: 0,Cancer,Proteomics,P_Value,Medians,fake_median,fake_pval,Key
0,Gbm,MCM3,8.3e-05,0.926526,0.9,0.0,
1,Hnscc,MCM3,0.000765,0.486629,0.9,0.0,
2,Luad,MCM3,0.154923,0.9383,0.9,0.0,
3,Lscc,MCM3,5.1e-05,1.3377,0.9,0.0,
4,Brca,MCM3,0.070444,0.4629,0.9,1e-10,
5,Ov,MCM3,0.417795,-0.144983,0.9,0.0,
6,En,MCM3,0.015138,-0.504,0.9,0.01,
7,Colon,MCM3,0.785105,-0.014,0.9,1e-05,


Create Heatmap with fake data to make the legend.

In [77]:
df = gene_df
circle_var = 'fake_pval'
color_var='fake_median'
x_axis= 'Key'
y_axis = 'Cancer'
plot_width = 400
plot_height = 600
x_axis_lab = ''
y_axis_lab = ''

df["size2"] = df[circle_var].apply(lambda x: -1*(np.log(x)))
df['size'] = (df["size2"])*3
#find values to set color bar min/ max as 
maxval = df[color_var].max()
minval = df[color_var].min()
if maxval > abs(minval):
    minval = maxval * -1 
if maxval < abs(minval):
    maxval = minval * -1
colors = list((RdBu[9]))
exp_cmap = LinearColorMapper(palette=colors, low = minval, high = maxval)
p = figure(x_range = FactorRange(), y_range = FactorRange(), plot_width= plot_width, 
           plot_height=plot_height, 
           toolbar_location=None, tools="hover")

p.scatter(x_axis,y_axis,source=df, fill_alpha=1,  line_width=0, size="size", 
          fill_color={"field":color_var, "transform":exp_cmap})

p.x_range.factors = sorted(df[x_axis].unique().tolist())
p.y_range.factors = sorted(df[y_axis].unique().tolist(), reverse = True)
p.xaxis.major_label_orientation = math.pi/2

bar = ColorBar(color_mapper=exp_cmap, location=(0,0))
#p.add_layout(bar, "right")

output_notebook()
show(p)