In [1]:
import os
import time
import pandas as pd
import numpy as np
import networkx as nx
import collections
from scipy import sparse as sp
from scipy.stats import rankdata

import itertools
from itertools import combinations, combinations_with_replacement, cycle
from functools import reduce

import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from util import *

import colorcet as cc

In [2]:
hemibrain_version = "v1.2.1"
log_msg("Hemibrain data set being used:", hemibrain_version)

preproc_dir = "oviIN/preprocessed-" + hemibrain_version
preproc_nodes = "preprocessed_nodes.csv"
preproc_centroids = "x"
preproc_edges = "preprocessed_undirected_edges.csv"

hemibrain_dir = "oviIN/clustering_" + hemibrain_version
hemibrain_nodes = "inputsoutputs_key.txt"
hemibrain_edges = "inputsoutputs.txt"

reneel_params = list(sorted(['0.05','0.1','0.5','0.75'], key=float))
type_params = ['celltype','instance']
list_of_params = reneel_params + type_params

log_msg("Set up directory info and useful lists")

HB_node_df_full = pd.read_csv(os.path.join(preproc_dir, preproc_nodes), index_col=0)
HB_node_df_full["type_group"] = HB_node_df_full["celltype"].apply(simplify_type)

2023 07 07 15:44:07  Hemibrain data set being used: v1.2.1
2023 07 07 15:44:07  Set up directory info and useful lists


In [3]:
preproc_dir = "oviIN/preprocessed_inputs-" + hemibrain_version
preproc_nodes = "preprocessed_nodes.csv"
preproc_centroids = "x"
preproc_edges = "preprocessed_undirected_edges.csv"

hemibrain_dir = "oviIN/clustering_inputs_" + hemibrain_version
hemibrain_nodes = "only_inputs_key.txt"
hemibrain_edges = "only_inputs.txt"

log_msg("Set up directory info and useful lists")

HB_node_df_in = pd.read_csv(os.path.join(preproc_dir, preproc_nodes), index_col=0)
HB_node_df_in["type_group"] = HB_node_df_in["celltype"].apply(simplify_type)

2023 07 07 15:44:08  Set up directory info and useful lists


In [4]:
preproc_dir = "oviIN/preprocessed_outputs-" + hemibrain_version
preproc_nodes = "preprocessed_nodes.csv"
preproc_centroids = "x"
preproc_edges = "preprocessed_undirected_edges.csv"

hemibrain_dir = "oviIN/clustering_outputs_" + hemibrain_version
hemibrain_nodes = "only_outputs_key.txt"
hemibrain_edges = "only_outputs.txt"

log_msg("Set up directory info and useful lists")

HB_node_df_out = pd.read_csv(os.path.join(preproc_dir, preproc_nodes), index_col=0)
HB_node_df_out["type_group"] = HB_node_df_out["celltype"].apply(simplify_type)

2023 07 07 15:44:10  Set up directory info and useful lists


In [7]:
HB_node_df_full

Unnamed: 0_level_0,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,pre,...,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois,type_group
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1003215282,1,1,1,1,1,1,1,CL229_R,CL229,100,...,False,Roughly traced,PDM19,301.0,"[23044, 14981, 11600]","{'INP': {'pre': 87, 'post': 351, 'downstream':...",,"['EPA(R)', 'GOR(R)', 'IB', 'ICL(R)', 'INP', 'S...","['GOR(R)', 'IB', 'ICL(R)', 'INP', 'SCL(R)', 'S...",CL
1005952640,2,1,1,2,2,2,2,IB058_R,IB058,664,...,False,Roughly traced,PVL20,,,"{'INP': {'pre': 464, 'post': 1327, 'downstream...",,"['ATL(R)', 'IB', 'ICL(R)', 'INP', 'PLP(R)', 'S...","['ATL(R)', 'IB', 'ICL(R)', 'INP', 'PLP(R)', 'S...",IB
1006928515,3,1,1,1,3,3,3,CL300_R,CL300,86,...,False,Roughly traced,PVL13,236.0,"[12083, 10523, 16816]","{'INP': {'pre': 79, 'post': 126, 'downstream':...",,"['ATL(R)', 'IB', 'ICL(R)', 'INP', 'SCL(R)', 'S...","['ATL(R)', 'IB', 'ICL(R)', 'INP', 'SCL(R)', 'S...",CL
1007260806,4,1,2,1,4,4,4,CL301_R,CL301,119,...,False,Roughly traced,PVL13,236.0,"[13524, 10108, 16480]","{'INP': {'pre': 40, 'post': 128, 'downstream':...",,"['GOR(R)', 'IB', 'ICL(R)', 'INP', 'PLP(R)', 'S...","['IB', 'ICL(R)', 'INP', 'PLP(R)', 'SCL(R)', 'S...",CL
1007402796,5,1,1,2,5,5,5,PS119_R,PS119,245,...,False,Roughly traced,PDM16,301.0,"[25364, 12010, 12544]","{'SNP(R)': {'pre': 100, 'post': 50, 'downstrea...",,"['CAN(R)', 'GOR(R)', 'IB', 'ICL(L)', 'ICL(R)',...","['AVLP(R)', 'CAN(R)', 'IB', 'ICL(L)', 'INP', '...",PS
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
988269593,4545,3,5,5,95,13,1785,FB4E_L,FB4E,168,...,False,Roughly traced,AVM08,,,"{'SNP(L)': {'post': 25, 'upstream': 25, 'mito'...",CRELALFB4_1,"['CRE(-RUB)(L)', 'CRE(L)', 'CX', 'FB', 'FB-col...","['CRE(-RUB)(L)', 'CRE(L)', 'CX', 'FB', 'FB-col...",FB
988291460,4546,4,4,263,1059,1501,1786,,,2,...,,0.5assign,,,,"{'SNP(L)': {'pre': 2, 'post': 1, 'downstream':...",,"['SMP(L)', 'SNP(L)']","['SMP(L)', 'SNP(L)']",
988567837,4547,5,8,7,13,13,13,FB4G_R,FB4G,785,...,False,Roughly traced,AVM08,,,"{'SNP(R)': {'pre': 6, 'post': 73, 'downstream'...",CRELALFB4_3,"['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...",FB
988909130,4548,5,8,7,27,56,436,FB5V_R,FB5V,269,...,False,Roughly traced,AVM10,296.5,"[13226, 32024, 18600]","{'SNP(R)': {'pre': 1, 'post': 28, 'downstream'...",CRELALFB5,"['AB(R)', 'CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX',...","['CRE(-ROB,-RUB)(R)', 'CRE(R)', 'CX', 'FB', 'F...",FB


In [26]:
f = HB_node_df_full[HB_node_df_full['0.5']==403]
f[f['0.0']==1]

Unnamed: 0_level_0,key,0.0,0.05,0.1,0.5,0.75,1.0,instance,celltype,pre,...,cropped,statusLabel,cellBodyFiber,somaRadius,somaLocation,roiInfo,notes,inputRois,outputRois,type_group
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
299290105,963,1,1,2,403,522,617,SMP291_R,SMP291,1061,...,False,Roughly traced,PDM05,301.0,"[23184, 12006, 13840]","{'SNP(R)': {'pre': 1056, 'post': 4001, 'downst...",,"['ATL(R)', 'INP', 'SCL(R)', 'SIP(R)', 'SMP(R)'...","['INP', 'SCL(R)', 'SIP(R)', 'SMP(R)', 'SNP(R)']",SMP
358639742,1074,1,1,2,403,522,591,SMP512_R,SMP512,440,...,False,Roughly traced,PDM22,301.0,"[20686, 12671, 10864]","{'SNP(R)': {'pre': 263, 'post': 1788, 'downstr...",,"['INP', 'SCL(R)', 'SMP(L)', 'SMP(R)', 'SNP(L)'...","['INP', 'SCL(R)', 'SMP(L)', 'SMP(R)', 'SNP(L)'...",SMP
360677632,1095,1,1,2,403,522,631,SMP251_R,SMP251,823,...,False,Roughly traced,PDL18,311.0,"[5629, 13062, 14416]","{'SNP(R)': {'pre': 440, 'post': 2116, 'downstr...",,"['ATL(R)', 'INP', 'SCL(R)', 'SIP(R)', 'SMP(L)'...","['INP', 'SCL(R)', 'SIP(R)', 'SMP(L)', 'SMP(R)'...",SMP
449288517,1286,1,1,2,403,691,776,SMP533_R,SMP533,202,...,False,Roughly traced,PDM24,270.0,"[14961, 19917, 4376]","{'SNP(R)': {'pre': 202, 'post': 1173, 'downstr...",,"['INP', 'MB(+ACA)(R)', 'SCL(R)', 'SLP(R)', 'SM...","['SLP(R)', 'SMP(R)', 'SNP(R)']",SMP
453130054,1320,1,1,2,403,522,591,SMP516_R,SMP516,422,...,False,Roughly traced,PDM22,301.0,"[20878, 11691, 12736]","{'SNP(R)': {'pre': 249, 'post': 1650, 'downstr...",,"['INP', 'SCL(R)', 'SLP(R)', 'SMP(L)', 'SMP(R)'...","['INP', 'SCL(R)', 'SMP(L)', 'SMP(R)', 'SNP(L)'...",SMP
481406868,1391,1,1,2,403,522,721,SMP404_R,SMP404,391,...,False,Roughly traced,PDM09,316.0,"[21061, 10439, 16344]","{'SNP(R)': {'pre': 391, 'post': 2115, 'downstr...",,"['ATL(R)', 'INP', 'SIP(R)', 'SMP(R)', 'SNP(R)']","['SIP(R)', 'SMP(R)', 'SNP(R)']",SMP
481747302,1394,1,1,2,403,522,545,SMP516_R,SMP516,409,...,False,Roughly traced,PDM22,301.0,"[20594, 14085, 9984]","{'SNP(R)': {'pre': 234, 'post': 1740, 'downstr...",,"['INP', 'SCL(R)', 'SLP(R)', 'SMP(L)', 'SMP(R)'...","['INP', 'SCL(R)', 'SLP(R)', 'SMP(L)', 'SMP(R)'...",SMP
484355328,1410,1,1,2,403,522,591,SMP043_R,SMP043,272,...,False,Roughly traced,ADL27,292.5,"[12026, 32944, 13232]","{'SNP(R)': {'pre': 270, 'post': 592, 'downstre...",,"['AVLP(R)', 'INP', 'PLP(R)', 'SCL(R)', 'SLP(R)...","['AVLP(R)', 'INP', 'SCL(R)', 'SLP(R)', 'SMP(R)...",SMP
511353266,1491,1,1,2,403,691,869,SLP412_R,SLP412,182,...,False,Roughly traced,PDM18,254.5,"[17409, 23962, 3412]","{'SNP(R)': {'pre': 160, 'post': 505, 'downstre...",,"['INP', 'MB(+ACA)(R)', 'PLP(R)', 'SCL(R)', 'SL...","['INP', 'LH(R)', 'MB(+ACA)(R)', 'PLP(R)', 'SCL...",SLP
511772101,1494,1,1,2,403,522,591,,,511,...,True,Leaves,,,,"{'SNP(R)': {'pre': 324, 'post': 317, 'downstre...",,"['SMP(L)', 'SMP(R)', 'SNP(L)', 'SNP(R)']","['SMP(L)', 'SMP(R)', 'SNP(L)', 'SNP(R)']",
