In [4]:
import os
import time
import pandas as pd
import numpy as np
import networkx as nx
import collections
from scipy import sparse as sp
from scipy.stats import rankdata

from itertools import combinations, combinations_with_replacement, cycle
from functools import reduce

import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from util import log_msg, simplify_type
log_msg("Imports complete")


2023 06 21 14:56:46  Imports complete


In [16]:
hemibrain_version = "v1.2.1"
log_msg("Hemibrain data set being used:", hemibrain_version)

preproc_dir = "oviIN/preprocessed-" + hemibrain_version
preproc_nodes = "preprocessed_nodes.csv"
preproc_centroids = "x"
preproc_edges = "preprocessed_undirected_edges.csv"

hemibrain_dir = "oviIN/clustering_" + hemibrain_version
hemibrain_nodes = "inputsoutputs_key.txt"
hemibrain_edges = "inputsoutputs.txt"
#hemibrain_nodes = "only_inputs_key.txt"
#hemibrain_edges = "only_inputs.txt"
#hemibrain_nodes = "only_outputs_key.txt"
#hemibrain_edges = "only_outputs.txt"

figure_dir = os.path.join("figures","paper")
movie_dir = os.path.join("movies")
analysis_dir = os.path.join("analysis",hemibrain_version)
obj_dir = os.path.join("obj",hemibrain_version)  # 3d objects from, e.g. fetch_roi_mesh
skel_dir = os.path.join("skeleton", hemibrain_version)  # skeleta of neurons in .csv format


for d in [figure_dir, analysis_dir, obj_dir, movie_dir]:
    if not os.path.isdir(d):
        log_msg("Creating directory", d)
        os.makedirs(d)

reneel_params = list(sorted(['0.0', '0.1','0.5','0.75'], key=float))
type_params = ['celltype','instance']
list_of_params = reneel_params + type_params

log_msg("Set up directory info and useful lists")

2023 06 21 14:59:20  Hemibrain data set being used: v1.2.1
2023 06 21 14:59:20  Set up directory info and useful lists


In [17]:
from neuprint import Client
from neuprint import fetch_roi_hierarchy


auth_token_file = open("flybrain.auth.txt", 'r')
auth_token = next(auth_token_file).strip()
try:
    np_client = Client('neuprint.janelia.org', dataset='hemibrain:' + hemibrain_version, token=auth_token)
    log_msg("neuprint Client set up as `np_Client`, version", np_client.fetch_version())
except:
    np_client = None
    log_msg("neuprint Client set up failed!")

2023 06 21 14:59:21  neuprint Client set up as `np_Client`, version 0.1.0


In [18]:
log_msg("Loading node dataframe")
if os.path.isfile(os.path.join(preproc_dir, preproc_centroids)):
    log_msg("  (with centroids)")
    HB_node_df = pd.read_csv(os.path.join(preproc_dir, preproc_centroids), index_col=0)
else:
    log_msg("  (without centroids)")
    HB_node_df = pd.read_csv(os.path.join(preproc_dir, preproc_nodes), index_col=0)
log_msg("Adding 'type group'")
HB_node_df["type_group"] = HB_node_df["celltype"].apply(simplify_type)
log_msg("Done!")

log_msg("Loading directed edges from csv")
HB_edge_df = pd.read_csv(os.path.join(hemibrain_dir, hemibrain_edges), delimiter=' ', header=None).rename(columns={0: "pre", 1:"post"})
log_msg("Done!")

log_msg("Merging in cell info to edge df")
HB_edge_df = HB_edge_df.merge(HB_node_df[list_of_params + ['type_group']], left_on='pre', right_index=True)
HB_edge_df = HB_edge_df.merge(HB_node_df[list_of_params + ['type_group']], left_on='post', right_index=True, suffixes=['pre', 'post'])
log_msg("Done!")

2023 06 21 14:59:21  Loading node dataframe
2023 06 21 14:59:21    (without centroids)
2023 06 21 14:59:21  Adding 'type group'
2023 06 21 14:59:21  Done!
2023 06 21 14:59:21  Loading directed edges from csv
2023 06 21 14:59:21  Done!
2023 06 21 14:59:21  Merging in cell info to edge df
2023 06 21 14:59:21  Done!


In [51]:
def create_lists(clusters):
    list_point1 = []
    list_point5 = []
    for i in clusters:
        list_point1 += HB_node_df[HB_node_df['0.1']==i].index.tolist()
        list_point5 += HB_node_df[HB_node_df['0.05']==i].index.tolist()
    return list_point1, list_point5

In [38]:
# Assigning high res clusters to corresponding low res cluster
cluster1 = [1,2,4,47]
cluster2 = [3, 42, 133, 134]
cluster3 = [5]
cluster4 = [6, 115, 123]
cluster5 = [7, 12, 13, 67]
cluster6 = [8, 10, 48, 409]

In [40]:
li_point1, li_point5

1268

In [45]:
# Pulling cluster for all clusters at 0.0
li_cluster1=HB_node_df[HB_node_df['0.0']==1].index
li_cluster2=HB_node_df[HB_node_df['0.0']==2].index
li_cluster3=HB_node_df[HB_node_df['0.0']==3].index
li_cluster4=HB_node_df[HB_node_df['0.0']==4].index
li_cluster5=HB_node_df[HB_node_df['0.0']==5].index
li_cluster6=HB_node_df[HB_node_df['0.0']==6].index

In [46]:
def calculate_difference(list, list2):
    set1 = set(list)
    set2 = set(list2)

    unique_1 = set1-set2
    unique_2 = set2-set1

    common = set1.intersection(set2)
    total_unique = len(unique_1) + len(unique_2)
    jaccard_sim = len(common) / (total_unique + len(common))

    return unique_1, unique_2, common, jaccard_sim

In [47]:
unique_1, unique_2, common, jaccard_sim = calculate_difference(li_point1, li_cluster1)

In [50]:
print('Neurons unique to chi value of 0.1: ', len(unique_1))
print('Neurons unique to chi value of 0.0 of cluster 1: ', len(unique_2))
#print('Common neurons:', common)
print('Length of common neurons:', len(common))
print("Jaccard similarity:", jaccard_sim)

Neurons unique to list 1:  277
Neurons unique to list 2:  187
Length of common neurons: 991
Jaccard similarity: 0.6810996563573883
