In [4]:
import os
import time
import pandas as pd
import numpy as np
import networkx as nx
import collections
from scipy import sparse as sp
from scipy.stats import rankdata

from itertools import combinations, combinations_with_replacement, cycle
from functools import reduce

import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from util import log_msg, simplify_type
log_msg("Imports complete")


2023 06 22 12:49:47  Imports complete


In [11]:
hemibrain_version = "v1.2.1"
log_msg("Hemibrain data set being used:", hemibrain_version)

preproc_dir = "oviIN/preprocessed-" + hemibrain_version
preproc_nodes = "preprocessed_nodes.csv"
preproc_centroids = "x"
preproc_edges = "preprocessed_undirected_edges.csv"

hemibrain_dir = "oviIN/clustering_" + hemibrain_version
hemibrain_nodes = "inputsoutputs_key.txt"
hemibrain_edges = "inputsoutputs.txt"
#hemibrain_nodes = "only_inputs_key.txt"
#hemibrain_edges = "only_inputs.txt"
#hemibrain_nodes = "only_outputs_key.txt"
#hemibrain_edges = "only_outputs.txt"

figure_dir = os.path.join("figures","paper")
movie_dir = os.path.join("movies")
analysis_dir = os.path.join("analysis",hemibrain_version)
obj_dir = os.path.join("obj",hemibrain_version)  # 3d objects from, e.g. fetch_roi_mesh
skel_dir = os.path.join("skeleton", hemibrain_version)  # skeleta of neurons in .csv format


for d in [figure_dir, analysis_dir, obj_dir, movie_dir]:
    if not os.path.isdir(d):
        log_msg("Creating directory", d)
        os.makedirs(d)

reneel_params = list(sorted(['0.0', '0.1','0.5','0.75'], key=float))
type_params = ['celltype','instance']
list_of_params = reneel_params + type_params

log_msg("Set up directory info and useful lists")

2023 06 22 13:01:28  Hemibrain data set being used: v1.2.1
2023 06 22 13:01:28  Set up directory info and useful lists


In [12]:
from neuprint import Client
from neuprint import fetch_roi_hierarchy


auth_token_file = open("flybrain.auth.txt", 'r')
auth_token = next(auth_token_file).strip()
try:
    np_client = Client('neuprint.janelia.org', dataset='hemibrain:' + hemibrain_version, token=auth_token)
    log_msg("neuprint Client set up as `np_Client`, version", np_client.fetch_version())
except:
    np_client = None
    log_msg("neuprint Client set up failed!")

2023 06 22 13:01:29  neuprint Client set up as `np_Client`, version 0.1.0


In [13]:
log_msg("Loading node dataframe")
if os.path.isfile(os.path.join(preproc_dir, preproc_centroids)):
    log_msg("  (with centroids)")
    HB_node_df = pd.read_csv(os.path.join(preproc_dir, preproc_centroids), index_col=0)
else:
    log_msg("  (without centroids)")
    HB_node_df = pd.read_csv(os.path.join(preproc_dir, preproc_nodes), index_col=0)
log_msg("Adding 'type group'")
HB_node_df["type_group"] = HB_node_df["celltype"].apply(simplify_type)
log_msg("Done!")

log_msg("Loading directed edges from csv")
HB_edge_df = pd.read_csv(os.path.join(hemibrain_dir, hemibrain_edges), delimiter=' ', header=None).rename(columns={0: "pre", 1:"post"})
log_msg("Done!")

log_msg("Merging in cell info to edge df")
HB_edge_df = HB_edge_df.merge(HB_node_df[list_of_params + ['type_group']], left_on='pre', right_index=True)
HB_edge_df = HB_edge_df.merge(HB_node_df[list_of_params + ['type_group']], left_on='post', right_index=True, suffixes=['pre', 'post'])
log_msg("Done!")

2023 06 22 13:01:29  Loading node dataframe
2023 06 22 13:01:29    (without centroids)
2023 06 22 13:01:29  Adding 'type group'
2023 06 22 13:01:29  Done!
2023 06 22 13:01:29  Loading directed edges from csv
2023 06 22 13:01:29  Done!
2023 06 22 13:01:29  Merging in cell info to edge df
2023 06 22 13:01:29  Done!


In [44]:
HB_edge_df

Unnamed: 0,pre,post,2,0.0pre,0.1pre,0.5pre,0.75pre,celltypepre,instancepre,type_grouppre,0.0post,0.1post,0.5post,0.75post,celltypepost,instancepost,type_grouppost
0,203253253,203594164,5,2,3,330,421,SMP505,SMP505(PDM22)_L,SMP,3,80,337,432,,,
5330,296859399,203594164,1,2,3,6,19,SMP560,SMP560_R,SMP,3,80,337,432,,,
31403,329566174,203594164,1,3,5,13,166,OA-VPM3,OA-VPM3(NO2/NO3)_R,OA,3,80,337,432,,,
39389,357224041,203594164,1,2,3,452,606,LHPV5l1,LHPV5l1_R,LHPV,3,80,337,432,,,
42741,360138182,203594164,1,4,6,338,580,,,,3,80,337,432,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
408540,5813020840,670785345,1,2,3,210,685,SMP503,SMP503(PDM22)_L,SMP,2,157,747,1131,,,
418091,5813032847,923752335,1,5,7,193,655,SMP053,SMP053_R,SMP,6,243,1007,1,,,
424662,5813055749,791090597,1,5,2,273,877,SMP185,SMP185_R,SMP,1,203,872,1295,,,
435303,5813066377,1750816348,1,1,1,191,27,,,,1,68,293,368,,,


In [28]:
# Creates the list of the correct clusters that aligns to specified clusters
def create_lists(clusters):
    list_point1 = []
    #list_point5 = []
    for i in clusters:
        list_point1 += HB_node_df[HB_node_df['0.1']==i].index.tolist()
        #list_point5 += HB_node_df[HB_node_df['0.05']==i].index.tolist()
    return list_point1

# Calculates certain differences between the different lists of neurons
def calculate_difference(list, list2):
    set1 = set(list)
    set2 = set(list2)

    unique_1 = set1-set2
    unique_2 = set2-set1

    common = set1.intersection(set2)
    total_unique = len(unique_1) + len(unique_2)
    jaccard_sim = len(common) / (total_unique + len(common))

    return unique_1, unique_2, common, jaccard_sim

# Compiles all functions into one and prints out the information
def analysis(clusters, zero_cluster, cluster_number):
    list_point1 = create_lists(clusters)
    unique_1, unique_2, common_one, jaccard_sim_one = calculate_difference(list_point1, zero_cluster)
    print("\nThis is information about clusters {} compared with cluster {}".format(clusters, cluster_number))
    print('Neurons unique to clusters at chi value of 0.1: ', len(unique_1))
    print('Neurons unique to chi value of 0.0 of cluster 1: ', len(unique_2))
    print('Length of common neurons:', len(common_one))
    print("Jaccard similarity between zero cluster 1 and 0.1:", jaccard_sim_one)

In [29]:
# Assigning high res clusters to corresponding low res cluster
cluster1 = [1,2,4,47]
cluster2 = [3, 42, 133, 134]
cluster3 = [5]
cluster4 = [6, 115, 123]
cluster5 = [7, 12, 13, 67]
cluster6 = [8, 10, 48, 409]

In [30]:
# Pulling cluster for all clusters at 0.0
li_cluster1=HB_node_df[HB_node_df['0.0']==1].index
li_cluster2=HB_node_df[HB_node_df['0.0']==2].index
li_cluster3=HB_node_df[HB_node_df['0.0']==3].index
li_cluster4=HB_node_df[HB_node_df['0.0']==4].index
li_cluster5=HB_node_df[HB_node_df['0.0']==5].index
li_cluster6=HB_node_df[HB_node_df['0.0']==6].index

In [42]:
analysis(cluster1, li_cluster1, 1)
analysis(cluster2, li_cluster2, 2)
analysis(cluster3, li_cluster3, 3)
analysis(cluster4, li_cluster4, 4)
analysis(cluster5, li_cluster5, 5)
analysis(cluster6, li_cluster6, 6)



This is information about clusters [1, 2, 4, 47] compared with cluster 1
Neurons unique to clusters at chi value of 0.1:  277
Neurons unique to chi value of 0.0 of cluster 1:  187
Length of common neurons: 991
Jaccard similarity between zero cluster 1 and 0.1: 0.6810996563573883

This is information about clusters [3, 42, 133, 134] compared with cluster 2
Neurons unique to clusters at chi value of 0.1:  9
Neurons unique to chi value of 0.0 of cluster 1:  177
Length of common neurons: 605
Jaccard similarity between zero cluster 1 and 0.1: 0.7648546144121365

This is information about clusters [5] compared with cluster 3
Neurons unique to clusters at chi value of 0.1:  1
Neurons unique to chi value of 0.0 of cluster 1:  34
Length of common neurons: 345
Jaccard similarity between zero cluster 1 and 0.1: 0.9078947368421053

This is information about clusters [6, 115, 123] compared with cluster 4
Neurons unique to clusters at chi value of 0.1:  44
Neurons unique to chi value of 0.0 of clus