In [212]:
import numpy as np
import pandas as pd
from bokeh.layouts import row, widgetbox
from bokeh.models import CustomJS, Slider, Select
from bokeh.plotting import figure, output_file, show, ColumnDataSource
from bokeh.io import push_notebook, output_notebook, curdoc
from bokeh.client import push_session
output_notebook()
from bokeh.models import LabelSet
import os
import umap
from sklearn.datasets import load_digits
import matplotlib.pyplot as plt
%matplotlib inline

In [213]:
dir_path='C:\\Users\\bazoo\\Dropbox (Personal)\\gilad_final_analysis\\publication\\Nature Cancer Yossi Figures\\Python Generated'
ICI_normalized = 'ICI_normalized_without_miRNA_and_Dummy.csv'
SI2_normalized = 'SI12_normalized_without_miRNA_and_Dummy.csv'
ribosomal_genes_path=os.path.join('C:\\Users\\bazoo\\data_processing_gilad\\normalized_all','ribosomal_genes.csv')
normalized_files = [os.path.join(dir_path,ICI_normalized), os.path.join(dir_path,SI2_normalized)]
dic={}
for norm_file in normalized_files:
    dic[os.path.basename(norm_file)] = pd.read_csv(norm_file)
ribosomal_genes=pd.read_csv(ribosomal_genes_path)

df_ici = dic[ICI_normalized] 
df_si12 = dic[SI2_normalized] 
ribosomal_genes_array=ribosomal_genes.iloc[:,0].values
df_ici['isRibosomal'] = df_ici.gene_id.isin(ribosomal_genes_array)
df_si12['isRibosomal'] = df_si12.gene_id.isin(ribosomal_genes_array)

df_all = df_ici.merge(df_si12, 
                      left_on=['target_id','gene_id', 'UID','seq','isRibosomal'], 
                      right_on=['target_id', 'gene_id', 'UID','seq', 'isRibosomal'], 
                      suffixes=('_ici', '_si12'))

ici_times    = sorted([c for c in df_ici.columns if 'ICI' in c])
si12_times   = sorted([c for c in df_si12.columns if 'SI12' in c])
bothtimes    = [*ici_times, *si12_times]

not_ici_label_times = sorted(['T0_A_2', 'T0_B', 'T1_1_A', 'T1_3_C', 'T2_1_A', 'T2_3_A', 'T3_1_A', 'T3_3_B', 'T4_3_A', 'T5(4)_1_A','T5_3_A', 'T6(5)_1_A', 'T6_3_C', 'T7(6)_A_1',])
not_si12_label_times   = sorted(['T0_A_2','T0_B','T1_1_A','T1_2_B','T2_1_A','T2_2_A','T3_1_A','T3_2_A','T4_1_A','T4_2_A','T5_1_A','T5_2_A','T6_1_A','T6_2_A'])
not_labeled_bothtimes    = sorted(['T0_A_2_ici','T0_B_ici','T1_1_A_ici','T1_3_C','T2_1_A_ici','T2_3_A','T3_1_A_ici','T3_3_B','T5(4)_1_A','T4_3_A','T6(5)_1_A','T5_3_A','T7(6)_A_1','T6_3_C','T0_A_2_si12','T0_B_si12','T1_1_A_si12','T1_2_B','T2_1_A_si12','T2_2_A','T3_1_A_si12','T3_2_A','T4_1_A','T4_2_A','T5_1_A','T5_2_A','T6_1_A','T6_2_A'])



In [214]:
df_all.columns

Index(['target_id', 'gene_id', 'UID', 'seq', 'T0_A_2_ici', 'T0_B_ici',
       'T1_1_A_ici', 'T1_3_C', 'ICI-T1', 'T2_1_A_ici', 'T2_3_A', 'ICI-T2',
       'T3_1_A_ici', 'T3_3_B', 'ICI-T3', 'T5(4)_1_A', 'T4_3_A', 'ICI-T4',
       'T6(5)_1_A', 'T5_3_A', 'ICI-T5', 'T7(6)_A_1', 'T6_3_C', 'ICI-T6',
       'isRibosomal', 'T0_A_2_si12', 'T0_B_si12', 'T1_1_A_si12', 'T1_2_B',
       'SI12-T1', 'T2_1_A_si12', 'T2_2_A', 'SI12-T2', 'T3_1_A_si12', 'T3_2_A',
       'SI12-T3', 'T4_1_A', 'T4_2_A', 'SI12-T4', 'T5_1_A', 'T5_2_A', 'SI12-T5',
       'T6_1_A', 'T6_2_A', 'SI12-T6'],
      dtype='object')

In [226]:
# we allowed labels vs. not labeled.

DO_MODE=3

if DO_MODE == 1:

    all_genes  = df_all.reset_index()[['gene_id',*bothtimes]]
    ici_genes  = df_ici.reset_index()[['gene_id',*ici_times]]
    si12_genes = df_si12.reset_index()[['gene_id',*si12_times]]

    all_genes  = all_genes.groupby('gene_id').mean()
    ici_genes  = ici_genes.groupby('gene_id').mean()
    si12_genes = si12_genes.groupby('gene_id').mean()

elif DO_MODE == 2:

    all_genes  = df_all.reset_index()[['gene_id',*not_labeled_bothtimes]]
    ici_genes  = df_ici.reset_index()[['gene_id',*not_ici_label_times]]
    si12_genes = df_si12.reset_index()[['gene_id',*not_si12_label_times]]

    all_genes  = all_genes.groupby('gene_id').mean()
    ici_genes  = ici_genes.groupby('gene_id').mean()
    si12_genes = si12_genes.groupby('gene_id').mean()

elif DO_MODE == 3:
    
    ici_label_times    = sorted(['T0_A_2', 'T0_B', 'ICI-T1', 'ICI-T4','ICI-T5'])
    si12_label_times   = sorted(['T0_A_2', 'T0_B', 'SI12-T1','SI12-T5','SI12-T6'])
    labeled_bothtimes  = sorted(['T0_A_2_ici', 'T0_B_ici', 'ICI-T1','SI12-T1','ICI-T4','SI12-T5','ICI-T5','SI12-T6'])

    print(labeled_bothtimes)
    all_genes  = df_all.reset_index()[['gene_id',*labeled_bothtimes]]
    ici_genes  = df_ici.reset_index()[['gene_id',*ici_label_times]]
    si12_genes = df_si12.reset_index()[['gene_id',*si12_label_times]]

    
    all_genes  = all_genes.groupby(['gene_id'])[labeled_bothtimes].agg({'min','max','mean'})
    ici_genes  = ici_genes.groupby(['gene_id'])[ici_label_times].agg({'min','max','mean'})
    si12_genes = si12_genes.groupby(['gene_id'])[si12_label_times].agg({'min','max','mean'})

['ICI-T1', 'ICI-T4', 'ICI-T5', 'SI12-T1', 'SI12-T5', 'SI12-T6', 'T0_A_2_ici', 'T0_B_ici']


In [228]:
ici_umap_df  = get_df_embedding(ici_genes)
all_umap_df  = get_df_embedding(all_genes)
si12_umap_df = get_df_embedding(si12_genes)

  n_components
  n_components
  n_components


In [230]:
from bokeh.models import ColumnDataSource, HoverTool, BoxZoomTool, ResetTool
from bokeh.plotting import figure, output_file, save

def bokeh_plot_hoover_umap_df(dataset,filename):
    #create sample pandaframe to work with, this will store the actual data

    # Here is a dict of some keys that I want to be able to pick from for plotting
    axis_map = {
        "x": "x",
        "y": "y",
        "target": "target"
    }

    #This is to update during the callback
    code = ''' var data = source.data;
               var value1 = val1.value;
               var value2 = val2.value;
               var original_data = original_source.data
               // get data corresponding to selection
               x = original_data[value1];
               y = original_data[value2];
               data['x'] = x;
               data['y'] = y;
               source.trigger('change');
               // set axis labels
               x_axis.axis_label = value1
               y_axis.axis_label = value2
                '''
    source = ColumnDataSource(data=dict(x=dataset['x'], y=dataset['y'], target=dataset['target']))
    original_source = ColumnDataSource(data=dataset.to_dict(orient='list'))


    #plot the figures
    plot = figure(plot_width=500, plot_height=500, tools=[BoxZoomTool(), ResetTool(), HoverTool(tooltips=[('gene', '@target')])])
    plot.circle(x= "x",y="y", source=source, line_width=3, line_alpha=0.6)

#     labels = LabelSet(
#                 x='x',
#                 y='y',
#                 text='target',
#                 level='glyph',
#                 x_offset=5, 
#                 y_offset=5, 
#                 source=original_source, 
#                 render_mode='canvas')

#     plot.add_layout(labels)

    callback = CustomJS(args=dict(source=source, original_source = original_source, x_axis=plot.xaxis[0],y_axis=plot.yaxis[0]), code=code)

    #Create two select widgets to pick the features of interest 
    x_axis = Select(title="X Axis", options=sorted(axis_map.keys()), value="A", callback = callback)
    callback.args["val1"] = x_axis

    y_axis = Select(title="Y Axis", options=sorted(axis_map.keys()), value="B", callback = callback)
    callback.args["val2"] = y_axis

    plot.xaxis[0].axis_label = 'A'
    plot.yaxis[0].axis_label = 'B'

    #Display the graph in a jupyter notebook
    layout = row(plot, x_axis, y_axis)
    show(layout, notebook_handle=True)
    output_file("C:\\Users\\bazoo\\Dropbox (Personal)\\gilad_final_analysis\\{0}.html".format(filename))
    save(plot)

In [231]:
bokeh_plot_hoover_umap_df(ici_umap_df, filename='ici_umap')

In [232]:
bokeh_plot_hoover_umap_df(si12_umap_df, filename='si12_mean_umap')

In [233]:
bokeh_plot_hoover_umap_df(all_umap_df, filename='all_mean_umap')

# OBSOLETE 

In [229]:
def bokeh_plot_umap_df(dataset):
    #create sample pandaframe to work with, this will store the actual data

    # Here is a dict of some keys that I want to be able to pick from for plotting
    axis_map = {
        "x": "x",
        "y": "y",
        "target": "target"
    }

    #This is to update during the callback
    code = ''' var data = source.data;
               var value1 = val1.value;
               var value2 = val2.value;
               var original_data = original_source.data
               // get data corresponding to selection
               x = original_data[value1];
               y = original_data[value2];
               data['x'] = x;
               data['y'] = y;
               source.trigger('change');
               // set axis labels
               x_axis.axis_label = value1
               y_axis.axis_label = value2
                '''
    source = ColumnDataSource(data=dict(x=dataset['x'], y=dataset['y']))
    original_source = ColumnDataSource(data=dataset.to_dict(orient='list'))


    #plot the figures
    plot = figure(plot_width=400, plot_height=400)
    plot.circle(x= "x",y="y", source=source, line_width=3, line_alpha=0.6)

    labels = LabelSet(
                x='x',
                y='y',
                text='target',
                level='glyph',
                x_offset=5, 
                y_offset=5, 
                source=original_source, 
                render_mode='canvas')

    plot.add_layout(labels)

    callback = CustomJS(args=dict(source=source, original_source = original_source, x_axis=plot.xaxis[0],y_axis=plot.yaxis[0]), code=code)

    #Create two select widgets to pick the features of interest 
    x_axis = Select(title="X Axis", options=sorted(axis_map.keys()), value="A", callback = callback)
    callback.args["val1"] = x_axis

    y_axis = Select(title="Y Axis", options=sorted(axis_map.keys()), value="B", callback = callback)
    callback.args["val2"] = y_axis

    plot.xaxis[0].axis_label = 'A'
    plot.yaxis[0].axis_label = 'B'

    #Display the graph in a jupyter notebook
    layout = row(plot, x_axis, y_axis)
    show(layout, notebook_handle=True)