In [1]:
import pandas as pd
import numpy as np

In [2]:
import pandas as pd
import numpy as np
from bokeh.palettes import RdBu
from bokeh.models import LinearColorMapper, ColumnDataSource, ColorBar
from bokeh.models.ranges import FactorRange
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.io import export_png
from bokeh.io import export_svgs
import math as math


'''
@Param df: Dataframe. Contains column with x-axis categorical variables, y-axis categorical variables,
and columns for circle size and color gradient. 
@Param circle_var. String. Name of column for numeric data to base circle size off of 
@Param color_var. String. Name of column of numeric data to base color gradient off of. Can be the same or different as circle_var
@Param x_axis String. Name of column for x-axis categorical labels
@Param y_axis String. Name of column for y-axis categorical labels
@Param x_axis_lab. String. Default is no label. 
@Param y_axis_lab. String. Default is no label. 

This function creates a bokeh map that is heat map with extra variable of size of the circles. 

'''
def plotCircleHeatMap ( df, circle_var, color_var, x_axis, y_axis,x_axis_lab = "no_label", y_axis_lab = "no_label"):
  


    #added a new column to make the plot size
#     df['size'] = df['P_Value'].apply(lambda x: -1*(np.log(x)/10))


    df['size'] = np.where(df[circle_var]<0, np.abs(df[circle_var]), df[circle_var])*50
 

    colors = list(RdBu[9])
    exp_cmap = LinearColorMapper(palette=colors, low = -1, high = 1)
    p = figure(x_range = FactorRange(), y_range = FactorRange(), plot_width=700, 
               plot_height=700, 
               toolbar_location=None, tools="hover")

    p.scatter(x_axis,y_axis,source=df, fill_alpha=1,  line_width=0, size="size", 
              fill_color={"field":color_var, "transform":exp_cmap})

    p.x_range.factors = sorted(df[x_axis].unique().tolist())
    p.y_range.factors = sorted(df[y_axis].unique().tolist(), reverse = True)
    p.xaxis.major_label_orientation = math.pi/2
    
    if (x_axis_lab != "no_label" ):
        p.xaxis.axis_label = x_axis_lab
    if (x_axis_lab != "no_label" ):   
        p.yaxis.axis_label = y_axis_lab

    bar = ColorBar(color_mapper=exp_cmap, location=(0,0))
    p.add_layout(bar, "right")
    output_notebook()
  
    show(p)

In [3]:
trans = pd.read_csv('~/WhenMutationsDontMatter/PIK3CA/csv_files/proteomics.csv', index_col=0)

In [4]:
def significant(row):
    if row['P_Value_Brca'] > .05:
        row['P_Value_Brca'] = np.nan
        row['Difference_In_Median_Brca'] = np.nan

    if row['P_Value_Endo'] > .05:
        row['P_Value_Endo'] = np.nan
        row['Difference_In_Median_Endo'] = np.nan
        

    if row['P_Value'] > .05:
        row['P_Value'] = np.nan
        row['Difference_In_Median'] = np.nan
    return row

sig = trans.apply(significant, axis=1)



In [5]:
brca = sig.drop(columns=['Difference_In_Median_Endo', 'P_Value_Endo','Difference_In_Median','P_Value'])
brca['cancer_type'] = "brca"
brca.columns = ['Difference_In_Median', 'P_Value','cancer_type']
brca.head()

endo = sig.drop(columns=['Difference_In_Median_Brca', 'P_Value_Brca','Difference_In_Median','P_Value'])
endo['cancer_type'] = "endo"
endo.columns = ['Difference_In_Median', 'P_Value','cancer_type']
endo.head()

colon = sig.drop(columns=['Difference_In_Median_Brca', 'P_Value_Brca','Difference_In_Median_Endo','P_Value_Endo'])
colon['cancer_type'] = "colon"
colon.head()

c_and_e = colon.append(endo)
df = c_and_e.append(brca)
df["comparison"] = df.index
df

# log p-vals for right scale in plot (bigger circle, smaller pval)
df['size'] = df['P_Value'].apply(lambda x: -1*(np.log(x)/10))

df

Unnamed: 0,Difference_In_Median,P_Value,cancer_type,comparison,size
A2ML1,,,colon,A2ML1,
AADAT,,,colon,AADAT,
AAGAB,-0.1390,0.039189,colon,AAGAB,0.323937
AASDHPPT,,,colon,AASDHPPT,
AATF,,,colon,AATF,
...,...,...,...,...,...
ZNF654,-0.4929,0.048872,brca,ZNF654,0.301856
ZNF768,,,brca,ZNF768,
ZNHIT3,,,brca,ZNHIT3,
ZNRD1,,,brca,ZNRD1,


## Correlation set to .3

In [17]:
def HasPosNeg(row):
    hasPos = False
    hasNeg= False

    for item in row:
#         import pdb; pdb.set_trace()
        if pd.isnull(item):
            continue
        if item < -0.3:
            hasNeg = True
        if item > 0.3:
            hasPos = True
           
    if hasPos & hasNeg:
        return True
    return False

p = sig.drop(columns= ['P_Value_Brca','P_Value_Endo','P_Value'])
p["Pos_Neg"] = p.apply(HasPosNeg, axis = 1)
pn = p.loc[p['Pos_Neg'] == True]
pn_genes = list(pn.index)
get = df['comparison'].isin(pn_genes)
plot_df2 = df[get]
plot_df2.head()
plot_df2['size'] = plot_df2['P_Value'].apply(lambda x: -1*(np.log(x)/10))
# plot_df2

plotCircleHeatMap(plot_df2, circle_var = 'size', color_var='Difference_In_Median', x_axis= 'cancer_type', y_axis = 'comparison')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [13]:
def HasPosNeg(row):
    hasPos = False
    hasNeg= False

    for item in row:
#         import pdb; pdb.set_trace()
        if pd.isnull(item):
            continue
        if item < -0.6:
            hasNeg = True
        if item > 0.6:
            hasPos = True
           
    if hasPos & hasNeg:
        return True
    return False

p = sig.drop(columns= ['P_Value_Brca','P_Value_Endo','P_Value'])
p["Pos_Neg"] = p.apply(HasPosNeg, axis = 1)
pn = p.loc[p['Pos_Neg'] == True]
pn_genes = list(pn.index)
get = df['comparison'].isin(pn_genes)
plot_df3 = df[get]
plot_df3.head()
plot_df3['size'] = plot_df3['P_Value'].apply(lambda x: -1*(np.log(x)/10))
# plot_df2

plotCircleHeatMap(plot_df3, circle_var = 'size', color_var='Difference_In_Median', x_axis= 'cancer_type', y_axis = 'comparison')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
