In [1]:
#Bokeh & standard imports
import numpy as np
import pandas as pd

from bokeh.plotting import gridplot, figure, show, save
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, CustomJS
import subprocess
import aligater as ag

AliGater started in Jupyter mode


In [2]:
AGQC=ag.AGClasses.AGQC()

In [3]:
AGQC.load_QC_file("/home/hematogenomics/Ludvig/AliGater_output/phase_II_TCell_14-05-2019/phase_II_TCell_14-05-2019.QC.HDF5")

Loaded metadata for QC file with 11 gates:
CD194posTregsTregs
CD4negPBMC
CD4posPBMC
PBMCssinglets
TregsCD4pos
activatedTregTregs
naiveCD4negCD4neg
naiveCD4posCD4pos
restingTregTregs
secretingTregTregs
singletstotal
Load a specific population for QC with select_population


In [9]:
n_components=8
PC_DF=AGQC.run_QC("naiveCD4posCD4pos", n_components=n_components)

Loaded data for naiveCD4posCD4pos containing 4340 (32,32) images
PCs explained variance: 
[0.43659425 0.14535612 0.12362848 0.07187917 0.05130732 0.03383621
 0.01907726 0.01752723]
Total visual variation explained: 0.8992060497403145


In [10]:
output_notebook()

labels=['PC'+str(n) for n in np.arange(1,n_components+1,1)]
PC_DF_melt=pd.melt(PC_DF.reset_index(), id_vars='index', value_vars=labels,var_name='component')
source=ColumnDataSource(PC_DF_melt)

#Bokeh plotting code modified from user arkottke's answer
#https://stackoverflow.com/questions/41769655/using-bokeh-to-select-a-data-region-within-a-jupyter-notebook
callback_obj = CustomJS(code="""
        // Define a callback to capture errors on the Python side
        function callback(msg){
            console.log("Python callback returned unexpected message:", msg)
        }
        callbacks = {iopub: {output: callback}};

        // Select the data
        var inds = cb_obj.selected['1d'].indices;
        var d1 = cb_obj.data;
        var idx = []
        for (i = 0; i < inds.length; i++) {
            idx.push(d1['index'][inds[i]])
        }

        // Generate a command to execute in Python              
        data = {
            'idx': idx,
        }        
        var data_str = JSON.stringify(data)
        var cmd = "saved_selected(" + data_str + ")"

        // Execute the command on the Python kernel
        var kernel = IPython.notebook.kernel;
        kernel.execute(cmd, callbacks, {silent : false});
""")

# Create a callback with a kernel.execute to return to Jupyter
source.callback = callback_obj

source2=ColumnDataSource(PC_DF)
source2.callback = callback_obj

selected = dict()
def saved_selected(values):
    #x = np.array(values['x'])
    idx = np.array(values['idx'])
    data = {'idx': idx}
    selected.update(data)
    return

figkwds = dict(plot_width=500, plot_height=300, # webgl=True,
               x_axis_label='Component', y_axis_label='Value',
               tools="pan,lasso_select,box_select,reset,help")

p1 = figure(active_drag="lasso_select", x_range=labels, **figkwds)
p1.circle(source=source, x="component", y="value",fill_alpha=0.2, size=5)
p2 = figure(active_drag="lasso_select", **figkwds)
p2.scatter(x='PC1', y='PC2', source=source2, alpha=0.8)
p2.xaxis.axis_label = 'PC1'
p2.yaxis.axis_label = 'PC2'
layout= gridplot([[p1], [p2]])
handle = show(layout, notebook_handle=True)

In [11]:
def print_selected():
    unique_IDs = list(set(selected['idx'].tolist()))
    for id in unique_IDs:
        print(id.replace('/','-'))
    return unique_IDs

In [16]:
IDs = print_selected()

2017-05-23-Plate 2-T_C12_C12_074
2017-10-31-Plate 1-T_001_C9_C09_064
2017-01-31-Plate 3-T_G3_G03_023
2017-01-31-Plate 3-T_D3_D03_020
2017-05-23-Plate 1-T_H9_H09_093
2017-01-31-Plate 3-T_C3_C03_019
2017-04-06-Plate 1-T_B12_B12_066
2017-01-31-Plate 3-T_A3_A03_017
2017-01-31-Plate 3-T_E3_E03_021
2017-01-31-Plate 3-T_H3_H03_024
2017-01-31-Plate 3-T_F3_F03_022
2017-10-31-Plate 1-T_002_D9_D09_065
2017-01-31-Plate 3-T_B3_B03_018


In [13]:
#Subprocess call to collect images
def collect_images(src, tar, list_of_img_paths, prefix=None, suffix=None, samplistname=None):
    if tar[-1] != "/":
        tar= tar+"/"
    if src[-1] != "/":
        src = src+"/"
    if samplistname is None:
        sample_list = tar+'samples_from_bokeh.txt'
    else:
        sample_list = tar+samplistname
    
    with open(sample_list, 'w') as f:
        for i in np.arange(0,len(list_of_img_paths),1):
            path=list_of_img_paths[i]
            if prefix is not None:
                path = src+prefix+path
            else:
                path = src+path
            if suffix is not None:
                path = path + suffix
            f.write(path)
            if i < len(list_of_img_paths)-1:
                f.write("\n")
    f.close() 
    
    shellscript = ag.agconf.ag_home+'aligater/bokeh_collect_samples.sh'
    status = subprocess.run([shellscript, src, tar, sample_list])
    if status.returncode != 0:
        raise

In [17]:
IDs = [i.replace('/','-') for i in IDs]

In [19]:
collect_images("/home/hematogenomics/Programs/aligater/plots/phase_II/TCell",'/home/hematogenomics/Ludvig/tmp',IDs,prefix='memoryCD4pos/',suffix='-memoryCD4pos.png')