# Circle legend updates

In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import math as math
import scipy.stats
import re
import sys
import statsmodels.stats.multitest
from bokeh.palettes import RdBu
from bokeh.models import LinearColorMapper, ColumnDataSource, ColorBar
from bokeh.models.ranges import FactorRange
from bokeh.plotting import figure, show
from bokeh.io import output_notebook, export_png, export_svgs
from bokeh.layouts import row

In [15]:
def plotCircleHeatMap ( df, circle_var, color_var, x_axis, y_axis, plot_width= 1000, plot_height = 650, font_size = 12, x_axis_lab = "no_label", y_axis_lab = "no_label", 
                       show_plot = True, save_png = "plot.png", legend_min = 1e-6, legend_med = .0001, legend_max = 0.01, show_legend = True):
  
    # circle_var designed for pvalues. Normalized by taking log 10 of values and multiplying by 5 
    #added a new column to make the plot size
    
    df["size2"] = df[circle_var].apply(lambda x: -1*(np.log(x)))
    df['size'] = (df["size2"])*3
    #find values to set color bar min/ max as 
    maxval = df[color_var].max()
    minval = df[color_var].min()
    if maxval > abs(minval):
        minval = maxval * -1 
    if maxval < abs(minval):
        maxval = minval * -1
    colors = list((RdBu[9]))
    exp_cmap = LinearColorMapper(palette=colors, low = minval, high = maxval)
    p = figure(x_range = FactorRange(), y_range = FactorRange(), plot_width= plot_width, 
               plot_height=plot_height, 
               toolbar_location=None, tools="hover")

    p.scatter(x_axis,y_axis,source=df, fill_alpha=1,  line_width=0, size="size", 
              fill_color={"field":color_var, "transform":exp_cmap})

    #p.x_range.factors = sorted(df[x_axis].unique().tolist())
    p.x_range.factors = df[x_axis].unique().tolist()
    p.y_range.factors = sorted(df[y_axis].unique().tolist(), reverse = True)
    p.xaxis.major_label_orientation = math.pi/2
    
    # font size
    p.axis.major_label_text_font_size = str(font_size)+"pt"
    
    if (x_axis_lab != "no_label" ):
        p.xaxis.axis_label = x_axis_lab
    if (y_axis_lab != "no_label" ):   
        p.yaxis.axis_label = y_axis_lab

    bar = ColorBar(color_mapper=exp_cmap, location=(0,0))
    p.add_layout(bar, "right")
    
    if show_plot:  
        if show_legend:
            # Create Circle Legend
            circle_legend = create_circle_legend(circle_var, color_var, legend_min, legend_med, legend_max)
            circle_legend.axis.major_label_text_font_size = str(font_size - 1)+"pt" # font size
            output_notebook()
            show(row(p, circle_legend))
        else:
            output_notebook()
            show(p)
      
    if save_png != "plot.png":
        export_png(row(p, circle_legend), filename= save_png)
         

'''
@Param df: Dataframe. Same as df passed to plotCircleHeatMap.
@Param legend_min: Float. Lowest p-value to include in the legend.
@Param legend_max: Float. Highest p-value to include in the legend.
@Param color_var: 

Returns: df to be used in creating the circle legend. 
'''

def create_circle_legend_df(color_var, legend_min, legend_med, legend_max):
    # Find middle pval
    
    #exp_min = abs(np.log10(legend_min))
    #exp_max = abs(np.log10(legend_max))
    #delta = (exp_min - exp_max) / 2 
    #exp_mid = -1 * (exp_max + delta)
    #pval2 = 1 * 10**exp_mid
    
    # Foramat scientific notation pvals as strings for y_axis labels  
    max_str = "{:.1e}".format(legend_max, '.2f')
    med_str = "{:.1e}".format(legend_med, '.2f')
    min_str = "{:.1e}".format(legend_min, '.2f')
    
    # max to min
    data = {'P_Value':  [legend_max, legend_med, legend_min],
            'y_axis': [max_str, med_str, min_str],
            'x_axis': ['', '', ''],
            color_var: [1.5, 1.5, 1.5]}

    fake_df = pd.DataFrame (data, columns = ['x_axis', 'y_axis', 'P_Value', color_var])
    
    fake_df["size2"] = fake_df['P_Value'].apply(lambda x: -1*(np.log(x)))
    fake_df['size'] = (fake_df["size2"])*3
    
    return fake_df

'''
@Param df: Dataframe. Same as df passed to plotCircleHeatMap.
@Param circle_var: Column Label. Same as passed to plotCircleHeatMap.
@Param color_var: Column Label. Same as passed to plotCircleHeatMap.
@Param x_axis: Column Label. Used on the x-axis.
@Param y_axis: Column Label. Used on the y-axis.
@Param legend_min: Float. Lowest p-value to include in the legend.
@Param legend_max: Float. Highest p-value to include in the legend.

Returns: df to be used in creating the circle legend. 
'''

def create_circle_legend(circle_var, color_var, legend_min, legend_med, legend_max,
                         x_axis = 'x_axis', y_axis = 'y_axis', 
                         plot_height = 200, plot_width = 130):
    # Get customized df
    circle_df = create_circle_legend_df(color_var, legend_min, legend_med, legend_max)
    
    maxval = circle_df[color_var].max()
    minval = circle_df[color_var].min()
    if maxval > abs(minval):
        minval = maxval * -1 
    if maxval < abs(minval):
        maxval = minval * -1
    colors = list((RdBu[9]))
    exp_cmap = LinearColorMapper(palette=colors, low = minval, high = maxval)
    
    circle = figure(x_range = FactorRange(), y_range = FactorRange(), plot_width= plot_width, 
               plot_height=plot_height, toolbar_location=None, tools="hover")

    circle.scatter(x_axis, y_axis, source = circle_df, fill_alpha=1,  line_width=0, size="size", 
              fill_color={"field":color_var, "transform":exp_cmap})
    
    circle.x_range.factors = sorted(circle_df[x_axis].unique().tolist())
    circle.y_range.factors = circle_df[y_axis].unique().tolist() # plots in reverse order of circle_df (max to min)
    circle.xaxis.major_label_orientation = math.pi/2
    
    circle.xaxis.axis_label = 'FDR p-value'
    
    return circle

In [9]:
sig_df = pd.read_csv('../../Make_Tables/csv/sig_pval_heatmap.csv')

In [16]:
genes = ['MCM2','MCM3','MCM4']
b = sig_df.Proteomics.isin(genes)
df = sig_df[b]

In [17]:
plotCircleHeatMap(df, circle_var = 'P_Value', color_var='Medians', x_axis= 'Proteomics', y_axis = 'Cancer',
                    plot_height=400, plot_width= 500, legend_min = 2.5e-8, legend_med = .0001, legend_max = 0.05)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
