In [1]:
import numpy as np
import caveclient
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import tqdm
import os


In [2]:
client= caveclient.CAVEclient('minnie65_public', auth_token=os.environ['API_SECRET'])

In [3]:
# lets find all cell that have clean or extended axons
prf_df=client.materialize.query_table('proofreading_status_public_release',
                                      filter_in_dict={'status_axon': ['extended', 'clean']})

In [4]:
# lets find all cells that have been manually coregistered
coreg_df = client.materialize.query_table('coregistration_manual_v3', desired_resolution=[1000,1000,1000])

In [5]:
# merge these two to get the set of proofread AND coregistered cells
clean_coreg_df = pd.merge(prf_df, coreg_df, on='pt_root_id')

In [6]:
# how many do we have
clean_coreg_df

Unnamed: 0,id_x,created_x,superceded_id,valid_x,valid_id,status_dendrite,status_axon,pt_supervoxel_id_x,pt_root_id,pt_position_x,...,target_id,session,scan_idx,unit_id,field,residual,score,pt_position_y,bb_start_position,bb_end_position
0,295,2023-04-05 04:49:26.555498+00:00,,t,864691135397542177,extended,clean,89245092023510260,864691135927049742,"[178112, 191344, 22109]",...,265045,5,6,7305,6,9.06328,9.837156,"[712.448, 765.376, 884.36]","[nan, nan, nan]","[nan, nan, nan]"
1,1269,2023-04-06 05:09:05.153458+00:00,,t,864691135122603047,extended,extended,91346396048860133,864691135122603047,"[193152, 118976, 21021]",...,292685,9,4,2231,2,2.79599,12.003761,"[772.608, 475.904, 840.84]","[nan, nan, nan]","[nan, nan, nan]"
2,1269,2023-04-06 05:09:05.153458+00:00,,t,864691135122603047,extended,extended,91346396048860133,864691135122603047,"[193152, 118976, 21021]",...,292685,9,4,4691,4,12.45100,1.271139,"[772.608, 475.904, 840.84]","[nan, nan, nan]","[nan, nan, nan]"
3,1270,2023-04-06 05:09:05.154604+00:00,,t,864691135155894884,extended,extended,87475840375371574,864691135155894884,"[165408, 116672, 22032]",...,256576,5,7,2177,2,11.48900,-3.487494,"[661.632, 466.688, 881.28]","[nan, nan, nan]","[nan, nan, nan]"
4,1271,2023-04-06 05:09:05.156001+00:00,,t,864691135591041291,extended,extended,86913165367478600,864691135591041291,"[161168, 118496, 22997]",...,222998,9,4,2397,2,3.02052,6.575148,"[644.672, 473.984, 919.88]","[nan, nan, nan]","[nan, nan, nan]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
671,1246,2023-04-05 04:49:27.451383+00:00,,t,864691136990522517,extended,extended,93177907207497424,864691136990522517,"[206577, 133261, 18640]",...,327859,8,5,2597,2,7.76225,0.238802,"[826.304, 532.672, 745.8]","[nan, nan, nan]","[nan, nan, nan]"
672,1251,2023-04-05 04:49:27.455773+00:00,,t,864691137019596142,extended,extended,92619630044487968,864691137019596142,"[202346, 167997, 17807]",...,332199,4,7,6258,6,4.52099,6.761626,"[809.408, 672.064, 712.6]","[nan, nan, nan]","[nan, nan, nan]"
673,1257,2023-04-05 04:49:27.461276+00:00,,t,864691137020787054,clean,clean,91212530441577788,864691137020787054,"[192400, 170160, 20939]",...,298831,4,7,5933,6,5.86964,6.321741,"[769.6, 680.64, 837.56]","[nan, nan, nan]","[nan, nan, nan]"
674,1261,2023-04-05 04:49:27.464778+00:00,,t,864691137054388086,extended,clean,90229429674273122,864691137054388086,"[185152, 185344, 21255]",...,301095,5,7,7043,6,8.06943,0.228558,"[740.608, 741.376, 850.2]","[nan, nan, nan]","[nan, nan, nan]"


In [7]:
# just to make it more robust to losing connection lets collect our answers per cell in a dict
synapse_dfs = {}

In [None]:
# iterate over dataframe
for k, row in tqdm.tqdm_notebook(clean_coreg_df.iterrows(), total=len(clean_coreg_df)):
    # make sure we don't have the result already
    if synapse_dfs.get(row.pt_root_id, None) is None:
        # query the connections via to get a summary of connections (n_syn and sum synapse size) 
        # will attach nuclei if there is a single neuron
        syn_df= client.materialize.query_view('connections_with_nuclei',
                                              filter_equal_dict={'pre_pt_root_id':row.pt_root_id})
        # save the result in our dict
        synapse_dfs[row.pt_root_id]=syn_df
        

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for k, row in tqdm.tqdm_notebook(clean_coreg_df.iterrows(), total=len(clean_coreg_df)):


  0%|          | 0/676 [00:00<?, ?it/s]

In [10]:
# remove the attrs so we can concat these together without issue
for root_id in synapse_dfs:
    synapse_dfs[root_id].attrs={}

In [11]:
# concatenate all the results
all_syn_df = pd.concat([synapse_dfs[k] for k in synapse_dfs])

In [14]:
# filter out any autapses (they are most often false positives)
all_syn_df=all_syn_df.query('pre_nuc_id!=post_nuc_id')

In [17]:
# save the pickle file
all_syn_df.to_pickle('all_prf_coreg_conn_v661.pkl')

In [20]:
# here's our dataframe
all_syn_df

Unnamed: 0,pre_pt_root_id,post_pt_root_id,n_syn,sum_size,pre_nuc_id,post_nuc_id
0,864691135927049742,864691136389372944,1,624,265045,298914
1,864691135927049742,864691136687061614,1,3204,265045,272033
2,864691135927049742,864691135472970802,1,1484,265045,232849
3,864691135927049742,864691136310417242,1,1732,265045,303145
4,864691135927049742,864691135498644243,1,3948,265045,256548
...,...,...,...,...,...,...
483,864691137198217793,864691133800710458,1,4040,296731,-1
484,864691137198217793,864691135874602638,1,3388,296731,-1
485,864691137198217793,864691133462662992,1,3284,296731,-1
486,864691137198217793,864691134428985749,1,4004,296731,-1
