# Create Legend for Circle Size (Heatmaps)

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math as math
import scipy.stats
import re
import sys
import statsmodels.stats.multitest
from bokeh.palettes import RdBu
from bokeh.models import LinearColorMapper, ColumnDataSource, ColorBar, LabelSet
from bokeh.models.ranges import FactorRange
from bokeh.plotting import figure, show
from bokeh.io import output_notebook, export_png, export_svgs
from bokeh.layouts import row

import cptac
import cptac.utils as u
import plot_utils as plu

New functions: create_circle_legend_df,  create_circle_legend

In [2]:
'''
@Param df: Dataframe. Same as df passed to plotCircleHeatMap.
@Param lowest_pval: Float. Lowest p-value to include in the legend.
@Param highest_pval: Float. Highest p-value to include in the legend.

Returns: df to be used in creating the circle legend. 
'''

def create_circle_legend_df(lowest_pval = 1e-6, highest_pval = .01):
    lowest_pval_str = "{:.1e}".format(lowest_pval, '.2f')
    med_pval_str = "{:.1e}".format(lowest_pval * float(100), '.2f')
    highest_pval_str = "{:.1e}".format(highest_pval, '.2f')
    
    data = {'P_Value':  [lowest_pval, (lowest_pval * float(100)), highest_pval],
            'y_axis': [lowest_pval_str, med_pval_str, highest_pval_str],
            'x_axis': ['', '', ''],
            'Medians': [1.5, 1.5, 1.5]}

    fake_df = pd.DataFrame (data, columns = ['x_axis', 'y_axis', 'P_Value', 'Medians'])
    
    fake_df["size2"] = fake_df['P_Value'].apply(lambda x: -1*(np.log(x)))
    fake_df['size'] = (fake_df["size2"])*3
    
    return fake_df

In [3]:
'''
@Param df: Dataframe. Same as df passed to plotCircleHeatMap.
@Param circle_var: Column Label. Same as passed to plotCircleHeatMap.
@Param color_var: Column Label. Same as passed to plotCircleHeatMap.
@Param x_axis: Column Label. Used on the x-axis.
@Param y_axis: Column Label. Used on the y-axis.
@Param lowest_pval: Float. Lowest p-value to include in the legend.
@Param highest_pval: Float. Highest p-value to include in the legend.

Returns: df to be used in creating the circle legend. 
'''

def create_circle_legend(df, circle_var, color_var, x_axis = 'x_axis', y_axis = 'y_axis', 
                         lowest_pval = 1e-6, highest_pval = .01, plot_height = 200, plot_width = 120):
    # Use the smallest pval
    if df[circle_var].min() < lowest_pval:
        lowest_pval = df[circle_var].min()
    circle_df = create_circle_legend_df(lowest_pval, highest_pval)
    
    maxval = circle_df[color_var].max()
    minval = circle_df[color_var].min()
    if maxval > abs(minval):
        minval = maxval * -1 
    if maxval < abs(minval):
        maxval = minval * -1
    colors = list((RdBu[9]))
    exp_cmap = LinearColorMapper(palette=colors, low = minval, high = maxval)
    
    circle = figure(x_range = FactorRange(), y_range = FactorRange(), plot_width= plot_width, 
               plot_height=plot_height, toolbar_location=None, tools="hover")

    circle.scatter(x_axis, y_axis, source = circle_df, fill_alpha=1,  line_width=0, size="size", 
              fill_color={"field":color_var, "transform":exp_cmap})
    
    circle.x_range.factors = sorted(circle_df[x_axis].unique().tolist())
    circle.y_range.factors = sorted(circle_df[y_axis].unique().tolist(), reverse = False)
    circle.xaxis.major_label_orientation = math.pi/2
    
    circle.xaxis.axis_label = 'Legend'
    
    return circle
      

In [4]:
def plotCircleHeatMap ( df, circle_var, color_var, x_axis, y_axis,plot_width= 1000, 
                       plot_height = 650, x_axis_lab = "no_label", y_axis_lab = "no_label",
                       show_plot = True, save_png = "plot.png"):
  
    # circle_var designed for pvalues. Normalized by taking log 10 of values and multiplying by 5 
    #added a new column to make the plot size
    
    df["size2"] = df[circle_var].apply(lambda x: -1*(np.log(x)))
    df['size'] = (df["size2"])*3
    #find values to set color bar min/ max as 
    maxval = df[color_var].max()
    minval = df[color_var].min()
    if maxval > abs(minval):
        minval = maxval * -1 
    if maxval < abs(minval):
        maxval = minval * -1
    colors = list((RdBu[9]))
    exp_cmap = LinearColorMapper(palette=colors, low = minval, high = maxval)
    p = figure(x_range = FactorRange(), y_range = FactorRange(), plot_width= plot_width, 
               plot_height=plot_height, 
               toolbar_location=None, tools="hover")

    p.scatter(x_axis,y_axis,source=df, fill_alpha=1,  line_width=0, size="size", 
              fill_color={"field":color_var, "transform":exp_cmap})

    p.x_range.factors = sorted(df[x_axis].unique().tolist())
    p.y_range.factors = sorted(df[y_axis].unique().tolist(), reverse = True)
    p.xaxis.major_label_orientation = math.pi/2
    
    if (x_axis_lab != "no_label" ):
        p.xaxis.axis_label = x_axis_lab
    if (x_axis_lab != "no_label" ):   
        p.yaxis.axis_label = y_axis_lab

    bar = ColorBar(color_mapper=exp_cmap, location=(0,0))
    p.add_layout(bar, "right")
    

    # Create Circle Legend
    circle_legend = create_circle_legend(df, circle_var, color_var)

    if show_plot:
        output_notebook()
        show(row(p, circle_legend)) # Show main heatmap, then circle_key
      
    if save_png != "plot.png":
        export_png(p, filename= save_png)

Tests 

Test that create_circle_legend_df returns a df with pvals to be used in the circle legend.

In [5]:
create_circle_legend_df()


Unnamed: 0,x_axis,y_axis,P_Value,Medians,size2,size
0,,1e-06,1e-06,1.5,13.815511,41.446532
1,,0.0001,0.0001,1.5,9.21034,27.631021
2,,0.01,0.01,1.5,4.60517,13.815511


Test that create_circle_legend returns a bokeh object that can be viewed using show()

In [6]:
root = R'~\Github\WhenMutationsDontMatter\PTEN\Step_3_trans_effect\csv'
all_df = pd.read_csv(root+R'\all_heatmap.csv')

gene_df1 = all_df.loc[all_df['Proteomics'] == 'MCM3']
gene_df1['P_Value'].min()

5.107882284089402e-05

In [7]:
fig = create_circle_legend(gene_df1, 'P_Value','Medians')
show(fig)

Test implementing plotCircleHeatMap with addition of create_circle_legend

In [8]:
plotCircleHeatMap(gene_df1, circle_var = 'P_Value', color_var='Medians', x_axis= 'Proteomics', y_axis = 'Cancer',
                    plot_height=500, plot_width= 500)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':
