In [2]:
import caveclient
import pandas as pd
import numpy as np
import seaborn as sns
import os
from matplotlib import pyplot as plt
from scipy import stats
import tqdm
import standard_transform
import itertools
import time

trafo_streamline = standard_transform.datasets.v1dd_streamline_nm()

pd.set_option('display.max_columns', None) 
pd.set_option('display.max_rows', 1000) 
HOME = os.path.expanduser("~")

data_dir = f"{HOME}/SWDB_2025_Connectomics/data/"

In [3]:
mat_version = 1169
client = caveclient.CAVEclient("v1dd", server_address="https://global.em.brain.allentech.org", version=mat_version)

## Proofread information

In [46]:
neuron_soma_df = client.materialize.query_table("neurons_soma_model")
axon_proof_df = client.materialize.query_table("proofreading_status_and_strategy", filter_in_dict={"strategy_axon": ["axon_fully_extended"]})
single_soma_df = client.materialize.query_view("single_somas", split_positions=True, desired_resolution=[1, 1, 1])

dendrite_proof_root_ids = np.array(single_soma_df["pt_root_id"])
dendrite_proof_root_ids = dendrite_proof_root_ids[np.isin(dendrite_proof_root_ids, neuron_soma_df["pt_root_id"])]
axon_proof_root_ids = np.array(axon_proof_df["pt_root_id"])

In [47]:
np.save(f"{data_dir}/proofread_axon_list_{mat_version}.npy", axon_proof_root_ids)
np.save(f"{data_dir}/proofread_dendrite_list_{mat_version}.npy", dendrite_proof_root_ids)

## Cell information

In [14]:
cell_type_df = client.materialize.query_table("cell_type_snds", desired_resolution=[1, 1, 1], split_positions=True).rename(columns={"classification_system": "cell_type_coarse"})
cell_type_df = cell_type_df[~cell_type_df["cell_type"].map(lambda x: x.startswith("L23_") or x.startswith("L1_inh") or x.startswith("L6_6b"))]
nuc_df = client.materialize.query_view("nucleus_alternative_lookup", split_positions=True, desired_resolution=[1, 1, 1])

# reference_point = np.array([900_000.0, 700_000.0, 300_000.0])
# single_soma_df[["pt_position_x_trafo", "pt_position_y_trafo", "pt_position_z_trafo"]] = trafo_streamline.radial_points(reference_point, single_soma_df[["pt_position_x", "pt_position_y", "pt_position_z"]]) * 1000

In [16]:
def map_cell_type(ct_str):
    if ct_str.startswith("L"):
        return "-".join(ct_str.split("_")[:2])
    else:
        return ct_str[:3]
        
cell_type_df["cell_type"] = cell_type_df["cell_type"].map(map_cell_type)
cell_type_df["cell_type_coarse"] = cell_type_df["cell_type_coarse"].map(lambda x: {"inh": "I", "exc": "E"}[x])

In [24]:
soma_ct_df = pd.merge(nuc_df[["id", "pt_position_x", "pt_position_y", "pt_position_z", "pt_root_id", "volume"]], 
                      cell_type_df[["pt_root_id", "cell_type_coarse", "cell_type"]], how="left", on="pt_root_id")

In [25]:
soma_ct_df

Unnamed: 0,id,pt_position_x,pt_position_y,pt_position_z,pt_root_id,volume,cell_type_coarse,cell_type
0,228132,632828,749849,738270,864691132737039043,458.464831,,
1,543247,1304922,977915,83880,864691132730839988,73.345940,,
2,203262,624680,531094,283770,864691132654552792,338.276613,E,L3-IT
3,350562,894573,478559,163530,864691132773514104,326.965400,E,L2-IT
4,718122,1729859,674111,781200,864691132774106773,333.888647,,
...,...,...,...,...,...,...,...,...
207450,527607,1262940,628094,734445,864691132639606383,100.547645,,
207451,168582,491518,1057067,92070,864691133042980384,369.919126,,
207452,29422,302330,415005,81855,0,285.031368,,
207453,422767,1065603,538932,36405,864691132851361283,394.724290,,


In [26]:
soma_ct_df.to_feather(f"{data_dir}/soma_and_cell_type_{mat_version}.feather", compression="zstd")

## Synapse information

In [71]:
syn_path = f"{data_dir}/syn_df_all_to_proofread_to_all_{mat_version}.feather"

if os.path.exists(syn_path):
    syn_df = pd.read_feather(syn_path)
else:
    syn_dfs = []

    pre_syn_path = f"{data_dir}/pre_syn_df_all_to_proofread_to_all_{mat_version}.feather"
    if os.path.exists(pre_syn_path):
        pre_syn_df = pd.read_feather(pre_syn_path)
    else:
        for pre_root_ids in tqdm.tqdm(np.array_split(axon_proof_root_ids, len(axon_proof_root_ids) // 40)):
            syn_df_chunk = client.materialize.query_table("synapses_v1dd", filter_in_dict={"pre_pt_root_id": pre_root_ids}, desired_resolution=[1, 1, 1], split_positions=True)
            syn_df_chunk = syn_df_chunk[syn_df_chunk["pre_pt_root_id"] != syn_df_chunk["post_pt_root_id"]]
            syn_df_chunk = syn_df_chunk.drop(columns=["created", "valid", "superceded_id", "pre_pt_supervoxel_id", "post_pt_supervoxel_id"])
            assert len(syn_df_chunk) < 500_000
            syn_dfs.append(syn_df_chunk)     
        pre_syn_df = pd.concat(syn_dfs)
        pre_syn_df.to_feather(syn_path, compression="zstd")
        
    post_syn_path = f"{data_dir}/post_syn_df_all_to_proofread_to_all_{mat_version}.feather"
    if os.path.exists(post_syn_path):
        post_syn_df = pd.read_feather(post_syn_path)
    else:
        for post_root_ids in tqdm.tqdm(np.array_split(axon_proof_root_ids, len(axon_proof_root_ids) // 40)):
            syn_df_chunk = client.materialize.query_table("synapses_v1dd", filter_in_dict={"post_pt_root_id": post_root_ids}, desired_resolution=[1, 1, 1], split_positions=True)
            syn_df_chunk = syn_df_chunk[syn_df_chunk["pre_pt_root_id"] != syn_df_chunk["post_pt_root_id"]]
            syn_df_chunk = syn_df_chunk.drop(columns=["created", "valid", "superceded_id", "pre_pt_supervoxel_id", "post_pt_supervoxel_id"])
            assert len(syn_df_chunk) < 500_000
            syn_dfs.append(syn_df_chunk) 
        post_syn_df = pd.concat(syn_dfs)
        post_syn_df.to_feather(syn_path, compression="zstd")
    
    syn_df = pd.concat([pre_syn_df, post_syn_df])
    syn_df = syn_df.drop_duplicates("id", keep="first").reset_index(drop=True)
    syn_df.to_feather(syn_path, compression="zstd")

100%|████████████████████████████████████████| 30/30 [20:16<00:00, 40.56s/it]
100%|████████████████████████████████████████| 30/30 [36:00<00:00, 72.01s/it]
