In [1]:
import os
import time
import pandas as pd
import numpy as np
import networkx as nx
import collections
from scipy import sparse as sp
from scipy.stats import rankdata

import itertools
from itertools import combinations, combinations_with_replacement, cycle
from functools import reduce

import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from util import *

import colorcet as cc

import bokeh
from bokeh.io import output_notebook, output_file, show, save
from bokeh.plotting import figure
from bokeh.models import (Rect, MultiLine, Circle, Span, Label,
                          GraphRenderer, StaticLayoutProvider,
                          NodesAndLinkedEdges,
                          HoverTool, TapTool, ColumnDataSource,
                          LinearColorMapper, LogColorMapper, CategoricalColorMapper,
                          CategoricalMarkerMapper,
                          BoxSelectTool,
                          ColorBar, BasicTicker, BoxZoomTool, FactorRange,
                          Range1d)
from bokeh.models import CategoricalTicker, FixedTicker, BoxAnnotation
from bokeh.models import Arrow, NormalHead, OpenHead, VeeHead, LabelSet

from bokeh.transform import transform, factor_cmap, linear_cmap, log_cmap
from bokeh.layouts import row, column, gridplot
output_notebook()



In [2]:
def joint_marginal(df, c1, c2, include_fraction=False):
    """Given a dataframe and two columns, return a dataframe with the joint and marginal counts."""
    j = df.value_counts([c1, c2])
    j.name = "joint_count"
    j = j.reset_index()

    m1 = df.value_counts(c1)
    m1.name = f"{c1}_count"
    j = j.merge(m1, left_on=c1, right_index=True)

    m2 = df.value_counts(c2)
    m2.name = f"{c2}_count"
    j = j.merge(m2, left_on=c2, right_index=True)

    if include_fraction:
        j["joint_fraction"] = j["joint_count"] / j["joint_count"].sum()
        j[f"{c1}_fraction"] = j["joint_count"] / j[f"{c1}_count"]
        j[f"{c2}_fraction"] = j["joint_count"] / j[f"{c2}_count"]
    return j


In [3]:
hemibrain_version = "v1.2.1"
log_msg("Hemibrain data set being used:", hemibrain_version)

preproc_dir = "oviIN/preprocessed-" + hemibrain_version
preproc_nodes = "preprocessed_nodes.csv"
preproc_centroids = "x"
preproc_edges = "preprocessed_undirected_edges.csv"

hemibrain_dir = "oviIN/clustering_" + hemibrain_version
hemibrain_nodes = "inputsoutputs_key.txt"
hemibrain_edges = "inputsoutputs.txt"
#hemibrain_nodes = "only_inputs_key.txt"
#hemibrain_edges = "only_inputs.txt"
#hemibrain_nodes = "only_outputs_key.txt"
#hemibrain_edges = "only_outputs.txt"

figure_dir = os.path.join("figures","paper")
movie_dir = os.path.join("movies")
analysis_dir = os.path.join("analysis",hemibrain_version)
obj_dir = os.path.join("obj",hemibrain_version)  # 3d objects from, e.g. fetch_roi_mesh
skel_dir = os.path.join("skeleton", hemibrain_version)  # skeleta of neurons in .csv format


for d in [figure_dir, analysis_dir, obj_dir, movie_dir]:
    if not os.path.isdir(d):
        log_msg("Creating directory", d)
        os.makedirs(d)

reneel_params = list(sorted(['0.05','0.1','0.5','0.75'], key=float))
type_params = ['celltype','instance']
list_of_params = reneel_params + type_params

log_msg("Set up directory info and useful lists")

2024 01 24 17:04:04  Hemibrain data set being used: v1.2.1
2024 01 24 17:04:04  Set up directory info and useful lists


In [5]:
from neuprint import Client
from neuprint import fetch_roi_hierarchy, fetch_neurons, NeuronCriteria as NC


auth_token_file = open("flybrain.auth.txt", 'r')
auth_token = next(auth_token_file).strip()
try:
    np_client = Client('neuprint.janelia.org', dataset='hemibrain:' + hemibrain_version, token=auth_token)
    log_msg("neuprint Client set up as `np_Client`, version", np_client.fetch_version())
except:
    np_client = None
    log_msg("neuprint Client set up failed!")

log_msg("Loading node dataframe")
if os.path.isfile(os.path.join(preproc_dir, preproc_centroids)):
    log_msg("  (with centroids)")
    HB_node_df = pd.read_csv(os.path.join(preproc_dir, preproc_centroids), index_col=0)
else:
    log_msg("  (without centroids)")
    HB_node_df = pd.read_csv(os.path.join(preproc_dir, preproc_nodes), index_col=0)
log_msg("Adding 'type group'")
HB_node_df["type_group"] = HB_node_df["celltype"]
log_msg("Done!")

log_msg("Loading directed edges from csv")
HB_edge_df = pd.read_csv(os.path.join(hemibrain_dir, hemibrain_edges), delimiter=' ', header=None).rename(columns={0: "pre", 1:"post"})
log_msg("Done!")

log_msg("Merging in cell info to edge df")
HB_edge_df = HB_edge_df.merge(HB_node_df[list_of_params + ['type_group']], left_on='pre', right_index=True)
HB_edge_df = HB_edge_df.merge(HB_node_df[list_of_params + ['type_group']], left_on='post', right_index=True, suffixes=['pre', 'post'])
log_msg("Done!")
pd.set_option('display.max_rows', 200)

2024 01 24 17:04:29  neuprint Client set up as `np_Client`, version 0.1.0
2024 01 24 17:04:29  Loading node dataframe
2024 01 24 17:04:29    (without centroids)
2024 01 24 17:04:29  Adding 'type group'
2024 01 24 17:04:29  Done!
2024 01 24 17:04:29  Loading directed edges from csv
2024 01 24 17:04:29  Done!
2024 01 24 17:04:29  Merging in cell info to edge df
2024 01 24 17:04:29  Done!


In [6]:
#HB_node_df

In [7]:
from neuprint import fetch_simple_connections
nc = NC(type='SLP316')
neuron_df = fetch_simple_connections("oviIN", nc)
#neuron_df

In [8]:
HB_node_df[HB_node_df['celltype']=='aMe24']

Unnamed: 0_level_0,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,pre,...,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois,type_group
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
574965048,1838,4,7,6,330,573,631,aMe24(PDM24)_L,aMe24,446,...,True,Leaves,,,,"{'SNP(L)': {'pre': 439, 'post': 873, 'downstre...",,"['ATL(L)', 'ICL(L)', 'INP', 'SCL(L)', 'SMP(L)'...","['ATL(L)', 'INP', 'SCL(L)', 'SMP(L)', 'SNP(L)']",aMe24
5813011660,2059,1,1,2,419,549,735,aMe24_R,aMe24,1346,...,False,Roughly traced,PDM24,355.0,"[17120, 17799, 6112]","{'SNP(R)': {'pre': 706, 'post': 1823, 'downstr...",,"['AME(R)', 'ATL(R)', 'AVLP(R)', 'ICL(R)', 'INP...","['AME(R)', 'AVLP(R)', 'ICL(R)', 'INP', 'LH(R)'...",aMe24


In [9]:
s=HB_node_df[HB_node_df['0.0']==4]
x= s[s['0.05']==7]
f=x[x['0.1']==6]
#f[f['0.5']==330]
#z=h[h['0.75']==573]
#z[z['1.0']==631]

In [10]:
chi1 = '0.0'
fs=[]
for chi in reneel_params:
   jm = joint_marginal(HB_node_df, chi1, chi, include_fraction=True)
   display(jm)
   print(f"Clusters found at chi = {chi1}:", jm[chi1].max())
   print(f"Clusters found at chi = {chi}:", jm[chi].max())

   # sort the clusters on the y axis to get a more "diagonal" plot
   yrange = jm.sort_values([f"{chi}_fraction"], ascending=False).groupby(chi).agg({chi1: "first", f"{chi}_fraction": "first", "joint_count": "first"}).sort_values([chi1, "joint_count"], ascending=[True, False]).index
   f = figure(title=f"Clusters at chi = {chi} vs. clusters at chi = {chi1}",
            x_range=FactorRange(factors=[str(i + 1) for i in range(jm["0.0"].max())]),
         #    y_range=FactorRange(factors=[str(i + 1) for i in range(jm["0.05"].max())]),
            y_range=FactorRange(factors=[str(y) for y in yrange]),
            width=600, height=1000)


   jm["x"] = jm[chi1].apply(str)  # bokeh factor range has to have strings, so we have to convert these
   jm["y"] = jm[chi].apply(str)

   #fig_kws = dict(title=None,border_fill_color=None,outline_line_color=None, background_fill_color=None,)
   #f = figure(**fig_kws)

   f.rect(x="x", y="y",
         width=f"{chi1}_fraction", height=f"{chi}_fraction",
         source=jm)
   f.add_tools(HoverTool(tooltips={"Neurons": "@joint_count (@joint_fraction{%%} of Hemibrain)",
                                 f"Fraction of {chi1}": f"@{{{chi1}_fraction}}{{2.%%}}",
                                 f"Fraction of {chi}": f"@{{{chi}_fraction}}{{2.%%}}"}))
   fs.append(f)

g = gridplot(fs, ncols=2)
show(g)

Unnamed: 0,0.0,0.05,joint_count,0.0_count,0.05_count,joint_fraction,0.0_fraction,0.05_fraction
0,4,7,730,861,766,0.160475,0.847851,0.953003
24,1,7,5,1178,766,0.001099,0.004244,0.006527
25,2,7,5,782,766,0.001099,0.006394,0.006527
75,6,7,1,611,766,0.00022,0.001637,0.001305
13,5,7,23,738,766,0.005056,0.031165,0.030026
32,3,7,2,379,766,0.00044,0.005277,0.002611
9,4,4,82,861,459,0.018026,0.095238,0.178649
7,1,4,181,1178,459,0.039789,0.15365,0.394336
10,2,4,69,782,459,0.015168,0.088235,0.150327
12,6,4,53,611,459,0.011651,0.086743,0.115468


Clusters found at chi = 0.0: 6
Clusters found at chi = 0.05: 147


Unnamed: 0,0.0,0.1,joint_count,0.0_count,0.1_count,joint_fraction,0.0_fraction,0.1_fraction
0,4,6,679,861,721,0.149264,0.788618,0.941748
21,1,6,13,1178,721,0.002858,0.011036,0.018031
15,2,6,26,782,721,0.005716,0.033248,0.036061
103,6,6,1,611,721,0.000220,0.001637,0.001387
59,5,6,2,738,721,0.000440,0.002710,0.002774
...,...,...,...,...,...,...,...,...
186,5,21,1,738,1,0.000220,0.001355,1.000000
73,3,80,2,379,2,0.000440,0.005277,1.000000
115,3,24,1,379,1,0.000220,0.002639,1.000000
116,3,36,1,379,1,0.000220,0.002639,1.000000


Clusters found at chi = 0.0: 6
Clusters found at chi = 0.1: 263


Unnamed: 0,0.0,0.5,joint_count,0.0_count,0.5_count,joint_fraction,0.0_fraction,0.5_fraction
0,3,9,145,379,145,0.031875,0.382586,1.000000
4,3,13,66,379,69,0.014509,0.174142,0.956522
328,5,13,3,738,69,0.000659,0.004065,0.043478
16,3,18,38,379,56,0.008353,0.100264,0.678571
263,4,18,4,861,56,0.000879,0.004646,0.071429
...,...,...,...,...,...,...,...,...
871,6,919,1,611,1,0.000220,0.001637,1.000000
876,6,849,1,611,1,0.000220,0.001637,1.000000
882,6,916,1,611,1,0.000220,0.001637,1.000000
904,6,925,1,611,1,0.000220,0.001637,1.000000


Clusters found at chi = 0.0: 6
Clusters found at chi = 0.5: 1059


Unnamed: 0,0.0,0.75,joint_count,0.0_count,0.75_count,joint_fraction,0.0_fraction,0.75_fraction
0,3,12,80,379,95,0.017586,0.211082,0.842105
1670,4,12,1,861,95,0.000220,0.001161,0.010526
223,5,12,4,738,95,0.000879,0.005420,0.042105
41,6,12,10,611,95,0.002198,0.016367,0.105263
1,3,9,61,379,61,0.013410,0.160950,1.000000
...,...,...,...,...,...,...,...,...
1255,6,431,1,611,1,0.000220,0.001637,1.000000
1274,6,196,1,611,1,0.000220,0.001637,1.000000
1275,6,173,1,611,1,0.000220,0.001637,1.000000
1278,6,150,1,611,1,0.000220,0.001637,1.000000


Clusters found at chi = 0.0: 6
Clusters found at chi = 0.75: 1501


In [11]:
r= HB_node_df[HB_node_df['0.0']==3]
f = r[r['0.05']==5]
z= f[f['0.1']==5]
#z[z['0.5']==9]
#h[h['0.75']==496]


In [12]:
r= HB_node_df[HB_node_df['0.0']==3]
f = r[r['0.05']==5]
z= f[f['0.1']==5]
h = z[z['0.5']==9]
x=h[h['0.75']==9]
x[x['1.0']==14]

Unnamed: 0_level_0,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,pre,...,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois,type_group
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1008391068,15,3,5,5,9,9,14,FC2B(FQ7)_C6_L,FC2B,219,...,False,Roughly traced,PDM03,325.5,"[29681, 16321, 7328]","{'CX': {'pre': 167, 'post': 2135, 'downstream'...",FB1d356CRE,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...",FC2B
1014474192,59,3,5,5,9,9,14,FS1A(FQ9)_C2_L,FS1A,112,...,False,Roughly traced,PDM01,301.0,"[27300, 17574, 10448]","{'SNP(L)': {'pre': 31, 'post': 18, 'downstream...",FB2-6SMPSMP,"['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'...","['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'...",FS1A
1041071176,118,3,5,5,9,9,14,FC2B(FQ7)_C2_L,FC2B,185,...,False,Roughly traced,PDM01,325.5,"[25834, 20180, 7336]","{'CX': {'pre': 141, 'post': 2093, 'downstream'...",FB1d356CRE,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...",FC2B
1070102614,185,3,5,5,9,9,14,FB5G_L,FB5G,307,...,False,Roughly traced,AVM10,,,"{'SNP(L)': {'pre': 36, 'post': 271, 'downstrea...",SMPSIPFB56,"['CRE(-RUB)(L)', 'CRE(L)', 'CX', 'FB', 'FB-col...","['CRE(-RUB)(L)', 'CRE(L)', 'CX', 'FB', 'FB-col...",FB5G
1071760480,196,3,5,5,9,9,14,FC2C(FQ7)_C2_L,FC2C,173,...,False,Roughly traced,PDM01,325.5,"[26931, 21788, 7896]","{'SNP(R)': {'pre': 16, 'post': 31, 'downstream...",FB1d367CRE,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...",FC2C
1072533704,202,3,5,5,9,9,14,FC2C(FQ7)_C8_R,FC2C,144,...,False,Roughly traced,PDM01,301.0,"[25619, 13451, 12464]","{'SNP(L)': {'pre': 8, 'post': 9, 'downstream':...",FB1d367CRE,"['BU(L)', 'CRE(-RUB)(L)', 'CRE(L)', 'CX', 'FB'...","['CRE(-RUB)(L)', 'CRE(L)', 'CX', 'FB', 'FBl1',...",FC2C
1134849078,295,3,5,5,9,9,14,FC2B(FQ7)_C1_L,FC2B,170,...,False,Roughly traced,PDM01,316.0,"[27433, 20299, 9392]","{'SNP(R)': {'pre': 9, 'post': 15, 'downstream'...",FB1d356CRE,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...",FC2B
1164545788,345,3,5,5,9,9,14,FC2B(FQ7)_C2_L,FC2B,246,...,False,Roughly traced,PDM01,325.5,"[26400, 18401, 7648]","{'CX': {'pre': 184, 'post': 2808, 'downstream'...",FB1d356CRE,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...",FC2B
1196573432,376,3,5,5,9,9,14,FC2B(FQ7)_C2_R,FC2B,215,...,False,Roughly traced,PDM04,306.0,"[19243, 9331, 16440]","{'SNP(L)': {'pre': 1, 'post': 5, 'downstream':...",FB1d356CRE,"['CRE(-RUB)(L)', 'CRE(L)', 'CX', 'FB', 'FB-col...","['CRE(-RUB)(L)', 'CRE(L)', 'CX', 'FB', 'FB-col...",FC2B
1257629175,436,3,5,5,9,9,14,FC2B(FQ7)_C1_L,FC2B,217,...,False,Roughly traced,PDM01,325.5,"[26437, 21157, 7688]","{'SNP(R)': {'pre': 3, 'post': 11, 'downstream'...",FB1d356CRE,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...",FC2B


In [13]:
output_file("/Users/rhessa/flybrain-clustering/Figures-oviIN/joint_marginal_comparison.html")
save(g, title='Joint Marginal Analysis (full)')

'/Users/rhessa/flybrain-clustering/Figures-oviIN/joint_marginal_comparison.html'

In [14]:
# Inputs
hemibrain_version = "v1.2.1"
log_msg("Hemibrain data set being used:", hemibrain_version)

preproc_dir = "oviIN/preprocessed_inputs-" + hemibrain_version
preproc_nodes = "preprocessed_nodes.csv"
preproc_centroids = "x"
preproc_edges = "preprocessed_undirected_edges.csv"

hemibrain_dir = "oviIN/clustering_inputs_" + hemibrain_version
hemibrain_nodes = "only_inputs_key.txt"
hemibrain_edges = "only_inputs.txt"

figure_dir = os.path.join("figures","paper")
movie_dir = os.path.join("movies")
analysis_dir = os.path.join("analysis",hemibrain_version)
obj_dir = os.path.join("obj",hemibrain_version)  # 3d objects from, e.g. fetch_roi_mesh
skel_dir = os.path.join("skeleton", hemibrain_version)  # skeleta of neurons in .csv format


for d in [figure_dir, analysis_dir, obj_dir, movie_dir]:
    if not os.path.isdir(d):
        log_msg("Creating directory", d)
        os.makedirs(d)

reneel_params = list(sorted(['0.05','0.1','0.5','0.75'], key=float))
type_params = ['celltype','instance']
list_of_params = reneel_params + type_params

log_msg("Set up directory info and useful lists")

auth_token_file = open("flybrain.auth.txt", 'r')
auth_token = next(auth_token_file).strip()
try:
    np_client = Client('neuprint.janelia.org', dataset='hemibrain:' + hemibrain_version, token=auth_token)
    log_msg("neuprint Client set up as `np_Client`, version", np_client.fetch_version())
except:
    np_client = None
    log_msg("neuprint Client set up failed!")

log_msg("Loading node dataframe")
if os.path.isfile(os.path.join(preproc_dir, preproc_centroids)):
    log_msg("  (with centroids)")
    HB_node_df = pd.read_csv(os.path.join(preproc_dir, preproc_centroids), index_col=0)
else:
    log_msg("  (without centroids)")
    HB_node_df = pd.read_csv(os.path.join(preproc_dir, preproc_nodes), index_col=0)
log_msg("Adding 'type group'")
HB_node_df["type_group"] = HB_node_df["celltype"].apply(simplify_type)
log_msg("Done!")

log_msg("Loading directed edges from csv")
HB_edge_df = pd.read_csv(os.path.join(hemibrain_dir, hemibrain_edges), delimiter=' ', header=None).rename(columns={0: "pre", 1:"post"})
log_msg("Done!")

log_msg("Merging in cell info to edge df")
HB_edge_df = HB_edge_df.merge(HB_node_df[list_of_params + ['type_group']], left_on='pre', right_index=True)
HB_edge_df = HB_edge_df.merge(HB_node_df[list_of_params + ['type_group']], left_on='post', right_index=True, suffixes=['pre', 'post'])
log_msg("Done!")

chi1 = '0.0'
fs=[]
for chi in reneel_params:
   jm = joint_marginal(HB_node_df, chi1, chi, include_fraction=True)
   display(jm)
   print(f"Clusters found at chi = {chi1}:", jm[chi1].max())
   print(f"Clusters found at chi = {chi}:", jm[chi].max())

   # sort the clusters on the y axis to get a more "diagonal" plot
   yrange = jm.sort_values([f"{chi}_fraction"], ascending=False).groupby(chi).agg({chi1: "first", f"{chi}_fraction": "first", "joint_count": "first"}).sort_values([chi1, "joint_count"], ascending=[True, False]).index
   f = figure(title=f"Clusters at chi = {chi} vs. clusters at chi = {chi1}",
            x_range=FactorRange(factors=[str(i + 1) for i in range(jm["0.0"].max())]),
         #    y_range=FactorRange(factors=[str(i + 1) for i in range(jm["0.05"].max())]),
            y_range=FactorRange(factors=[str(y) for y in yrange]),
            width=600, height=1000)


   jm["x"] = jm[chi1].apply(str)  # bokeh factor range has to have strings, so we have to convert these
   jm["y"] = jm[chi].apply(str)

   #fig_kws = dict(title=None,border_fill_color=None,outline_line_color=None, background_fill_color=None,)
   #f = figure(**fig_kws)

   f.rect(x="x", y="y",
         width=f"{chi1}_fraction", height=f"{chi}_fraction",
         source=jm)
   f.add_tools(HoverTool(tooltips={"Neurons": "@joint_count (@joint_fraction{%%} of Hemibrain)",
                                 f"Fraction of {chi1}": f"@{{{chi1}_fraction}}{{2.%%}}",
                                 f"Fraction of {chi}": f"@{{{chi}_fraction}}{{2.%%}}"}))
   fs.append(f)

k = gridplot(fs, ncols=2)

2024 01 24 17:04:39  Hemibrain data set being used: v1.2.1
2024 01 24 17:04:39  Set up directory info and useful lists
2024 01 24 17:04:39  neuprint Client set up as `np_Client`, version 0.1.0
2024 01 24 17:04:39  Loading node dataframe
2024 01 24 17:04:39    (without centroids)
2024 01 24 17:04:39  Adding 'type group'


TypeError: 'float' object is not subscriptable

In [15]:
show(k)

NameError: name 'k' is not defined

In [16]:
r=HB_node_df[HB_node_df['0.0']==3]
f= r[r['0.05']==2]
z=f[f['0.1']==2]
h=z[z['0.5']==5]
x=h[h['0.75']==5]
x[x['1.0']==5]
# Either 5, 8 9

Unnamed: 0_level_0,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,pre,...,status,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1008024276,5,3,2,2,5,5,5,FB5N_R,FB5N,499,...,Traced,False,Roughly traced,AVM08,472.5,"[19178, 29711, 37312]","{'SNP(L)': {'post': 5, 'upstream': 5, 'mito': ...",SMPCREFB5_4,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."
1008391068,9,3,2,2,5,5,5,FC2B(FQ7)_C6_L,FC2B,219,...,Traced,False,Roughly traced,PDM03,325.5,"[29681, 16321, 7328]","{'CX': {'pre': 167, 'post': 2135, 'downstream'...",FB1d356CRE,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."
1039068185,59,3,2,2,5,5,5,FB5O_L,FB5O,574,...,Traced,False,Roughly traced,AVM10,,,"{'SNP(L)': {'pre': 25, 'post': 270, 'downstrea...",SMPCREFB5_5,"['BU(L)', 'CRE(-RUB)(L)', 'CRE(L)', 'CX', 'FB'...","['BU(L)', 'CRE(-RUB)(L)', 'CRE(L)', 'CX', 'FB'..."
1039400642,61,3,2,2,5,5,5,FB5O_R,FB5O,579,...,Traced,False,Roughly traced,AVM10,292.5,"[12926, 31418, 17584]","{'SNP(R)': {'pre': 24, 'post': 515, 'downstrea...",SMPCREFB5_5,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."
1070422316,103,3,2,2,5,5,5,FC2B(FQ7)_C3_L,FC2B,237,...,Traced,False,Roughly traced,PDM01,325.5,"[28778, 18075, 8016]","{'CX': {'pre': 186, 'post': 2696, 'downstream'...",FB1d356CRE,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."
1071121755,105,3,2,2,5,5,5,FB6W_R,FB6W,503,...,Traced,False,Roughly traced,AVM10,336.5,"[12515, 33697, 19328]","{'SNP(R)': {'pre': 14, 'post': 266, 'downstrea...",CRESMPFB6_2,"['BU(L)', 'CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX',...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."
1164545788,191,3,2,2,5,5,5,FC2B(FQ7)_C2_L,FC2B,246,...,Traced,False,Roughly traced,PDM01,325.5,"[26400, 18401, 7648]","{'CX': {'pre': 184, 'post': 2808, 'downstream'...",FB1d356CRE,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."
1257629175,234,3,2,2,5,5,5,FC2B(FQ7)_C1_L,FC2B,217,...,Traced,False,Roughly traced,PDM01,325.5,"[26437, 21157, 7688]","{'SNP(R)': {'pre': 3, 'post': 11, 'downstream'...",FB1d356CRE,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."
486842208,898,3,2,2,5,5,5,FB5C_R,FB5C,533,...,Traced,False,Roughly traced,AVM10,311.5,"[12096, 31738, 19024]","{'SNP(R)': {'pre': 78, 'post': 528, 'downstrea...",SMPCREFB5_1,"['BU(L)', 'CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX',...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."
5813011125,1223,3,2,2,5,5,5,FB2L_R,FB2L,355,...,Traced,False,Roughly traced,AVM08,451.5,"[19132, 27903, 36640]","{'SNP(R)': {'pre': 56, 'post': 878, 'downstrea...",SMPCREFB2_2,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F..."


In [17]:
output_file("/Users/rhessa/flybrain-clustering/Figures-oviIN/joint_marginal_comparison_inputs.html")
save(k, title='Joint Marginal Analysis (Inputs)')

NameError: name 'k' is not defined

In [19]:
# outputs
hemibrain_version = "v1.2.1"
log_msg("Hemibrain data set being used:", hemibrain_version)

preproc_dir = "oviIN/preprocessed_outputs-" + hemibrain_version
preproc_nodes = "preprocessed_nodes.csv"
preproc_centroids = "x"
preproc_edges = "preprocessed_undirected_edges.csv"

hemibrain_dir = "oviIN/clustering_outputs_" + hemibrain_version
hemibrain_nodes = "only_outputs_key.txt"
hemibrain_edges = "only_outputs.txt"

figure_dir = os.path.join("figures","paper")
movie_dir = os.path.join("movies")
analysis_dir = os.path.join("analysis",hemibrain_version)
obj_dir = os.path.join("obj",hemibrain_version)  # 3d objects from, e.g. fetch_roi_mesh
skel_dir = os.path.join("skeleton", hemibrain_version)  # skeleta of neurons in .csv format


for d in [figure_dir, analysis_dir, obj_dir, movie_dir]:
    if not os.path.isdir(d):
        log_msg("Creating directory", d)
        os.makedirs(d)

reneel_params = list(sorted(['0.05','0.1','0.5','0.75'], key=float))
type_params = ['celltype','instance']
list_of_params = reneel_params + type_params

log_msg("Set up directory info and useful lists")

auth_token_file = open("flybrain.auth.txt", 'r')
auth_token = next(auth_token_file).strip()
try:
    np_client = Client('neuprint.janelia.org', dataset='hemibrain:' + hemibrain_version, token=auth_token)
    log_msg("neuprint Client set up as `np_Client`, version", np_client.fetch_version())
except:
    np_client = None
    log_msg("neuprint Client set up failed!")

log_msg("Loading node dataframe")
if os.path.isfile(os.path.join(preproc_dir, preproc_centroids)):
    log_msg("  (with centroids)")
    HB_node_df = pd.read_csv(os.path.join(preproc_dir, preproc_centroids), index_col=0)
else:
    log_msg("  (without centroids)")
    HB_node_df = pd.read_csv(os.path.join(preproc_dir, preproc_nodes), index_col=0)
log_msg("Adding 'type group'")
HB_node_df["type_group"] = HB_node_df["celltype"]
log_msg("Done!")

log_msg("Loading directed edges from csv")
HB_edge_df = pd.read_csv(os.path.join(hemibrain_dir, hemibrain_edges), delimiter=' ', header=None).rename(columns={0: "pre", 1:"post"})
log_msg("Done!")

log_msg("Merging in cell info to edge df")
HB_edge_df = HB_edge_df.merge(HB_node_df[list_of_params + ['type_group']], left_on='pre', right_index=True)
HB_edge_df = HB_edge_df.merge(HB_node_df[list_of_params + ['type_group']], left_on='post', right_index=True, suffixes=['pre', 'post'])
log_msg("Done!")

chi1 = '0.0'
fs=[]
for chi in reneel_params:
   jm = joint_marginal(HB_node_df, chi1, chi, include_fraction=True)
   display(jm)
   print(f"Clusters found at chi = {chi1}:", jm[chi1].max())
   print(f"Clusters found at chi = {chi}:", jm[chi].max())

   # sort the clusters on the y axis to get a more "diagonal" plot
   yrange = jm.sort_values([f"{chi}_fraction"], ascending=False).groupby(chi).agg({chi1: "first", f"{chi}_fraction": "first", "joint_count": "first"}).sort_values([chi1, "joint_count"], ascending=[True, False]).index
   f = figure(title=f"Clusters at chi = {chi} vs. clusters at chi = {chi1}",
            x_range=FactorRange(factors=[str(i + 1) for i in range(jm["0.0"].max())]),
         #    y_range=FactorRange(factors=[str(i + 1) for i in range(jm["0.05"].max())]),
            y_range=FactorRange(factors=[str(y) for y in yrange]),
            width=600, height=1000)


   jm["x"] = jm[chi1].apply(str)  # bokeh factor range has to have strings, so we have to convert these
   jm["y"] = jm[chi].apply(str)

   #fig_kws = dict(title=None,border_fill_color=None,outline_line_color=None, background_fill_color=None,)
   #f = figure(**fig_kws)

   f.rect(x="x", y="y",
         width=f"{chi1}_fraction", height=f"{chi}_fraction",
         source=jm)
   f.add_tools(HoverTool(tooltips={"Neurons": "@joint_count (@joint_fraction{%%} of Hemibrain)",
                                 f"Fraction of {chi1}": f"@{{{chi1}_fraction}}{{2.%%}}",
                                 f"Fraction of {chi}": f"@{{{chi}_fraction}}{{2.%%}}"}))
   fs.append(f)

i = gridplot(fs, ncols=2)

2024 01 24 17:04:48  Hemibrain data set being used: v1.2.1
2024 01 24 17:04:48  Set up directory info and useful lists
2024 01 24 17:04:48  neuprint Client set up as `np_Client`, version 0.1.0
2024 01 24 17:04:48  Loading node dataframe
2024 01 24 17:04:48    (without centroids)
2024 01 24 17:04:48  Adding 'type group'
2024 01 24 17:04:48  Done!
2024 01 24 17:04:48  Loading directed edges from csv
2024 01 24 17:04:48  Done!
2024 01 24 17:04:48  Merging in cell info to edge df
2024 01 24 17:04:48  Done!


Unnamed: 0,0.0,0.05,joint_count,0.0_count,0.05_count,joint_fraction,0.0_fraction,0.05_fraction
0,4,5,597,778,602,0.173597,0.767352,0.991694
37,5,5,3,566,602,0.000872,0.005300,0.004983
52,6,5,2,550,602,0.000582,0.003636,0.003322
8,4,3,50,778,222,0.014539,0.064267,0.225225
9,1,3,46,712,222,0.013376,0.064607,0.207207
...,...,...,...,...,...,...,...,...
125,6,23,1,550,1,0.000291,0.001818,1.000000
126,6,26,1,550,1,0.000291,0.001818,1.000000
127,6,27,1,550,1,0.000291,0.001818,1.000000
208,6,159,1,550,1,0.000291,0.001818,1.000000


Clusters found at chi = 0.0: 6
Clusters found at chi = 0.05: 160


Unnamed: 0,0.0,0.1,joint_count,0.0_count,0.1_count,joint_fraction,0.0_fraction,0.1_fraction
0,4,5,621,778,632,0.180576,0.798201,0.982595
36,2,5,4,601,632,0.001163,0.006656,0.006329
54,5,5,2,566,632,0.000582,0.003534,0.003165
41,6,5,4,550,632,0.001163,0.007273,0.006329
255,3,5,1,232,632,0.000291,0.004310,0.001582
...,...,...,...,...,...,...,...,...
138,6,177,1,550,1,0.000291,0.001818,1.000000
141,6,18,1,550,1,0.000291,0.001818,1.000000
143,6,13,1,550,1,0.000291,0.001818,1.000000
263,6,201,1,550,1,0.000291,0.001818,1.000000


Clusters found at chi = 0.0: 6
Clusters found at chi = 0.1: 205


Unnamed: 0,0.0,0.5,joint_count,0.0_count,0.5_count,joint_fraction,0.0_fraction,0.5_fraction
0,5,15,136,566,157,0.039546,0.240283,0.866242
38,4,15,13,778,157,0.003780,0.016710,0.082803
694,6,15,1,550,157,0.000291,0.001818,0.006369
170,1,15,4,712,157,0.001163,0.005618,0.025478
284,2,15,3,601,157,0.000872,0.004992,0.019108
...,...,...,...,...,...,...,...,...
874,2,508,1,601,1,0.000291,0.001664,1.000000
970,2,163,1,601,1,0.000291,0.001664,1.000000
971,2,181,1,601,1,0.000291,0.001664,1.000000
975,2,191,1,601,1,0.000291,0.001664,1.000000


Clusters found at chi = 0.0: 6
Clusters found at chi = 0.5: 745


Unnamed: 0,0.0,0.75,joint_count,0.0_count,0.75_count,joint_fraction,0.0_fraction,0.75_fraction
0,3,5,53,232,53,0.015411,0.228448,1.000000
3,3,93,21,232,21,0.006106,0.090517,1.000000
6,3,95,17,232,17,0.004943,0.073276,1.000000
9,3,18,16,232,16,0.004653,0.068966,1.000000
15,3,52,13,232,14,0.003780,0.056034,0.928571
...,...,...,...,...,...,...,...,...
1038,6,910,1,550,1,0.000291,0.001818,1.000000
1040,6,909,1,550,1,0.000291,0.001818,1.000000
1044,6,886,1,550,1,0.000291,0.001818,1.000000
1047,6,879,1,550,1,0.000291,0.001818,1.000000


Clusters found at chi = 0.0: 6
Clusters found at chi = 0.75: 1098


In [20]:
output_file("/Users/rhessa/flybrain-clustering/Figures-oviIN/joint_marginal_comparison_outputs.html")
save(i, title='Join Marginal Analysis (Outputs)')

'/Users/rhessa/flybrain-clustering/Figures-oviIN/joint_marginal_comparison_outputs.html'

In [21]:
HB_node_df[HB_node_df['celltype']=='SMP550']

Unnamed: 0_level_0,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,pre,...,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois,type_group
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
452689494,789,6,32,5,234,437,498,SMP550_R,SMP550,1414,...,False,Roughly traced,PDM29,347.0,"[19511, 18367, 6016]","{'SNP(R)': {'pre': 1248, 'post': 3275, 'downst...",pSP7 candidates,"['AVLP(R)', 'INP', 'LH(R)', 'PLP(R)', 'SCL(R)'...","['AVLP(R)', 'INP', 'LH(R)', 'PLP(R)', 'SCL(R)'...",SMP550
579700707,1306,4,5,5,335,478,551,SMP550(PDM29)_L,SMP550,992,...,True,Leaves,,,,"{'SNP(L)': {'pre': 981, 'post': 1089, 'downstr...",pSP7 candidates,"['SMP(L)', 'SMP(R)', 'SNP(L)', 'SNP(R)']","['SMP(L)', 'SMP(R)', 'SNP(L)', 'SNP(R)']",SMP550


In [22]:
r=HB_node_df[HB_node_df['0.0']==6]
f= r[r['0.05']==9]
z=f[f['0.1']==16]
z[z['0.5']==22]
#h[h['0.75']==129]


Unnamed: 0_level_0,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,pre,...,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois,type_group
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1015479949,43,6,9,16,22,35,35,CRE003_a_R,CRE003_a,309,...,False,Roughly traced,ADL06,306.5,"[12340, 32294, 20200]","{'SNP(R)': {'pre': 20, 'post': 341, 'downstrea...",aSP14b candidates,"['ATL(R)', 'CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)'...","['ATL(R)', 'CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)'...",CRE003_a
1723506321,343,6,9,16,22,218,246,M_l2PNm14_R,M_l2PNm14,957,...,False,Roughly traced,AVM03,441.5,"[13727, 33545, 32552]","{'SNP(R)': {'pre': 465, 'post': 353, 'downstre...",,"['AL(R)', 'AL-D(R)', 'AL-DA2(R)', 'AL-DC1(R)',...","['AL(R)', 'AL-DC1(R)', 'AL-DC2(R)', 'AL-DC4(R)...",M_l2PNm14
355086967,562,6,9,16,22,160,384,SMP211_R,SMP211,177,...,False,Roughly traced,PDL06,285.0,"[6295, 18095, 5392]","{'SNP(R)': {'pre': 59, 'post': 223, 'downstrea...",,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'SCL(R)...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'SIP(R)...",SMP211
388280322,637,6,9,16,22,372,427,SMP448_R,SMP448,172,...,False,Roughly traced,PDM11,240.0,"[19409, 22871, 3792]","{'SNP(R)': {'pre': 138, 'post': 688, 'downstre...",,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'RUB(R)...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'SCL(R)...",SMP448
417834021,688,6,9,16,22,394,384,LHPD2a4_a_R,LHPD2a4_a,318,...,False,Roughly traced,PDL15,299.5,"[6148, 18697, 6352]","{'SNP(R)': {'pre': 211, 'post': 528, 'downstre...",,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'LH(R)'...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'LH(R)'...",LHPD2a4_a
449205588,762,6,9,16,22,426,489,LHPD2a4_b_R,LHPD2a4_b,302,...,False,Roughly traced,PDL15,341.0,"[4210, 22128, 6624]","{'SNP(R)': {'pre': 180, 'post': 354, 'downstre...",,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'LH(R)'...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'LH(R)'...",LHPD2a4_b
449205611,763,6,9,16,22,372,427,LHPD2a4_a_R,LHPD2a4_a,319,...,False,Roughly traced,PDL15,288.0,"[4298, 21397, 6368]","{'SNP(R)': {'pre': 222, 'post': 406, 'downstre...",,"['AVLP(R)', 'CRE(-ROB,-RUB)(R)', 'CRE(R)', 'IN...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'LH(R)'...",LHPD2a4_a
449209971,764,6,9,16,22,372,427,SIP049_R,SIP049,269,...,False,Roughly traced,PDL15,318.5,"[4010, 22141, 7904]","{'SNP(R)': {'pre': 242, 'post': 335, 'downstre...",,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'LH(R)'...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'LH(R)'...",SIP049
484130600,873,6,9,16,22,467,535,SMP145_R,SMP145,2264,...,False,Roughly traced,ADM10,269.5,"[18922, 31643, 7680]","{'SNP(R)': {'pre': 760, 'post': 2086, 'downstr...",,"['ATL(L)', 'ATL(R)', 'AVLP(R)', 'CA(R)', 'CRE(...","['AL(R)', 'ATL(L)', 'ATL(R)', 'BU(R)', 'CA(R)'...",SMP145
486880919,912,6,9,16,22,394,555,SMP012_R,SMP012,464,...,False,Roughly traced,ADL07,396.0,"[3564, 25849, 17408]","{'SNP(R)': {'pre': 337, 'post': 1433, 'downstr...",,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'MB(+AC...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'INP', 'MB(+AC...",SMP012


In [23]:
HB_node_df[HB_node_df['celltype']=='pC1e']

Unnamed: 0_level_0,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,pre,...,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois,type_group
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
514850616,974,1,1,1,312,494,578,pC1e_R,pC1e,697,...,False,Traced,PDM09,356.0,"[23338, 10397, 17648]","{'SNP(R)': {'pre': 387, 'post': 1777, 'downstr...",,"['AOTU(R)', 'AVLP(R)', 'EPA(R)', 'ICL(R)', 'IN...","['AOTU(R)', 'AVLP(R)', 'EPA(R)', 'ICL(R)', 'IN...",pC1e


In [24]:
h = z[z['0.5']==4]
x=h[h['0.75']==5]
x[x['1.0']==61]

Unnamed: 0_level_0,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,pre,...,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois,type_group
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1


In [25]:
HB_node_df[HB_node_df['celltype']=='SMP550']

Unnamed: 0_level_0,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,pre,...,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois,type_group
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
452689494,789,6,32,5,234,437,498,SMP550_R,SMP550,1414,...,False,Roughly traced,PDM29,347.0,"[19511, 18367, 6016]","{'SNP(R)': {'pre': 1248, 'post': 3275, 'downst...",pSP7 candidates,"['AVLP(R)', 'INP', 'LH(R)', 'PLP(R)', 'SCL(R)'...","['AVLP(R)', 'INP', 'LH(R)', 'PLP(R)', 'SCL(R)'...",SMP550
579700707,1306,4,5,5,335,478,551,SMP550(PDM29)_L,SMP550,992,...,True,Leaves,,,,"{'SNP(L)': {'pre': 981, 'post': 1089, 'downstr...",pSP7 candidates,"['SMP(L)', 'SMP(R)', 'SNP(L)', 'SNP(R)']","['SMP(L)', 'SMP(R)', 'SNP(L)', 'SNP(R)']",SMP550


In [27]:
# Pulled from Prof G's code on github (https://github.com/Gutierrez-lab/oviIN-analyses-gabrielle/blob/main/modular_sandbox.ipynb)
def modularity_merge(df1,df2,suf1,suf2):
    """Given two modularity dataframes, merge them along shared body IDs. Pass in suffixes for the columns as strings."""
    merged_mod_df = df1.merge(df2, left_on='id', right_on='id', suffixes=[suf1, suf2])
    #merged_mod_df = df1.merge(df2, left_on='id', right_on='id', suffixes=['_oviHB', '_wholeHB'])
    return merged_mod_df

# Defining path to whole brain data
preproc_dir = "hemibrain/preprocessed-" + "v1.2"
preproc_nodes = "preprocessed_nodes.csv"

# Creating dataframe
WB_node_df = pd.read_csv(os.path.join(preproc_dir, preproc_nodes), index_col=0)
WB_node_df["type_group"] = HB_node_df["celltype"]

# Print dataframe
WB_node_df

Unnamed: 0_level_0,0.75,0.05,0.1,0.25,0.5,1.0,0.0,instance,celltype,pre,...,status,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,inputRois,outputRois,roiInfo,type_group
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
200326126,1,1,1,1,1,1,1,CL141_L,CL141,395,...,Traced,False,Roughly traced,PDL20,251.0,"[3844, 14565, 15568]","['AVLP(R)', 'IB', 'ICL(R)', 'INP', 'LH(R)', 'M...","['AVLP(R)', 'ICL(R)', 'INP', 'LH(R)', 'MB(+ACA...","{'SNP(R)': {'pre': 17, 'post': 19, 'downstream...",
202916528,2,2,2,2,2,2,2,FS4C(FQ12)_C4_R,FS4C,95,...,Traced,False,Roughly traced,PDM03,301.0,"[20091, 12266, 12208]","['CX', 'FB', 'FB-column3', 'FBl1', 'FBl2', 'FB...","['CX', 'FB', 'FBl7', 'SMP(L)', 'SNP(L)']","{'SNP(L)': {'pre': 93, 'post': 35, 'downstream...",
203253072,3,2,3,3,3,3,2,FS4A(AB)(FQ12)_C3_R,FS4A,62,...,Traced,False,Roughly traced,PDM03,236.0,"[17809, 10233, 14208]","['AB(R)', 'CRE(-RUB)(L)', 'CRE(L)', 'CX', 'FB'...","['AB(R)', 'CX', 'FB', 'FB-column3', 'FBl1', 'F...","{'SNP(L)': {'pre': 52, 'post': 30, 'downstream...",
203253253,4,3,3,4,4,4,2,SMP505(PDM22)_L,SMP505,415,...,Traced,False,Roughly traced,,351.0,"[32168, 13757, 13336]","['ATL(L)', 'CRE(-RUB)(L)', 'CRE(L)', 'ICL(L)',...","['SIP(L)', 'SIP(R)', 'SLP(R)', 'SMP(L)', 'SMP(...","{'SNP(L)': {'pre': 150, 'post': 2282, 'downstr...",SMP505
203257652,5,2,3,2,2,2,2,FS4C(FQ12)_C6_R,FS4C,65,...,Traced,False,Roughly traced,PDM02,301.0,"[22758, 16749, 10240]","['CX', 'FB', 'FBl1', 'FBl2', 'FBl3', 'FBl4', '...","['SIP(L)', 'SMP(L)', 'SNP(L)']","{'SNP(L)': {'pre': 65, 'post': 36, 'downstream...",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7112579856,2322,1,1,157,1064,3243,1,LC20,LC20,319,...,Traced,False,Roughly traced,,301.5,"[7883, 23524, 33072]","['LO(R)', 'OL(R)', 'PLP(R)', 'VLNP(R)']","['LO(R)', 'OL(R)', 'PLP(R)', 'VLNP(R)']","{'OL(R)': {'pre': 268, 'post': 594, 'downstrea...",
7112615127,2670,9,19,46,226,3110,6,AVLP335_R,AVLP335,362,...,Traced,False,Roughly traced,AVL13,,,"['AVLP(R)', 'PVLP(R)', 'VLNP(R)']","['AVLP(R)', 'PVLP(R)', 'VLNP(R)']","{'VLNP(R)': {'pre': 362, 'post': 577, 'downstr...",
7112617294,2386,9,19,46,226,3362,6,AVLP331_R,AVLP331,285,...,Traced,False,Roughly traced,AVL13,,,"['AVLP(R)', 'PVLP(R)', 'VLNP(R)']","['AVLP(R)', 'PVLP(R)', 'VLNP(R)']","{'VLNP(R)': {'pre': 285, 'post': 661, 'downstr...",
7112622044,879,3,8,14,120,2836,3,LAL137(PVL05)_L,LAL137,1376,...,Traced,False,Roughly traced,,,,"['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'...","['CRE(-ROB,-RUB)(R)', 'CRE(-RUB)(L)', 'CRE(L)'...","{'SNP(R)': {'pre': 304, 'post': 130, 'downstre...",LAL137


In [28]:
# Defining base res and suffixes
res = '0.0'
df1_suf = '_oviHB'
df2_suf = '_wholeHB'

# Using merge function from code cell above
mod_merge_df = modularity_merge(HB_node_df[[res]],WB_node_df[[res]],df1_suf,df2_suf)
mod_merge_df

Unnamed: 0_level_0,0.0_oviHB,0.0_wholeHB
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1007402796,1,1
1007640554,2,1
1008024276,3,2
1008378448,3,2
1008378609,3,2
...,...,...
987117151,5,1
987877232,3,2
988269593,3,2
988567837,5,2


In [29]:
chi1 = res + df1_suf
chi2 = res + df2_suf

jm = joint_marginal(mod_merge_df, chi1, chi2, include_fraction=True)

# sort the clusters on the y axis to get a more "diagonal" plot
yrange = jm.sort_values([f"{chi2}_fraction"], ascending=False).groupby(chi2).agg({chi1: "first", f"{chi2}_fraction": "first", "joint_count": "first"}).sort_values([chi1, "joint_count"], ascending=[True, False]).index

# make a bokeh figure
f = figure(title=f"Clusters at chi2 = {chi2} vs. clusters at chi1 = {chi1}",
x_range=FactorRange(factors=[str(i + 1) for i in range(jm[chi1].max())]),
y_range=FactorRange(factors=[str(y) for y in yrange]),
width=600, height=700)
jm["x"] = jm[chi1].apply(str)  # bokeh factor range has to have strings, so we have to convert these
jm["y"] = jm[chi2].apply(str)

f.rect(x="x", y="y", width=f"{chi1}_fraction", height=f"{chi2}_fraction", source=jm)
f.add_tools(HoverTool(tooltips={"Neurons": "@joint_count (@joint_fraction{%%} of Hemibrain)",
                                f"Fraction of {chi2}": f"@{{{chi2}_fraction}}{{2.%%}}",
                                f"Fraction of {chi1}": f"@{{{chi1}_fraction}}{{2.%%}}"}))
f.xaxis.axis_label = 'Cluster in ' +chi1
f.yaxis.axis_label = 'Cluster in ' +chi2

show(f)