# Generate ET cell output tables from the proofread table and add putative cell type information

In [1]:
import pandas as pd
import numpy as np
import pcg_skel
import tqdm
from meshparty import meshwork
from caveclient import CAVEclient
import datetime
import os

client = CAVEclient('minnie65_phase3_v1')

pd.options.display.max_rows = None
pd.options.display.max_columns = None

In [2]:
# MAIN

now = client.materialize.get_timestamp()

In [3]:
skeldir = 'skeletons'

In [4]:
# Defining the HVA/VISp line:

xz0 = [237415, 26308]
xz1 = [286783, 8960]

x0 = xz0[0]
x1 = xz1[0]
z0 = xz0[1]
z1 = xz1[1]

def soma_in_hva(pt):
    ptz = pt[2]
    ptx = pt[0]
    x_thresh = x1 + (ptz-z1) * (x0-x1) / (z0-z1)
    return ptx > x_thresh

def classify_soma(pt):
    if np.any(np.isnan(pt)):
        return np.nan
        
    if soma_in_hva(pt):
        return 'hva'
    else:
        return 'v1'

First we want to get a list of outputs from the L5-ET cells, but restricted to their axon

In [5]:
et_table = 'bodor_pt_cells'
#client.materialize.version == 688

et_df = client.materialize.query_table(et_table)

In [None]:
# Build the skeletons
nrns = {}

for _, row in tqdm.tqdm(et_df.iterrows()):

    #print(row)
    if os.path.exists(f"{skeldir}/{row['pt_root_id']}.h5"):
        nrns[row["pt_root_id"]] = meshwork.load_meshwork(f"{skeldir}/{row['pt_root_id']}.h5")
   
    else:
    
        nrns[row["pt_root_id"]] = pcg_skel.coord_space_meshwork(
            row["pt_root_id"],
            client=client,
            root_point=row["pt_position"],
            root_point_resolution=[4, 4, 40],
            collapse_soma=True,
            synapses="all",
            synapse_table=client.info.get_datastack_info().get("synapse_table"),
            timestamp = now,
        )

        nrns[row["pt_root_id"]].save_meshwork(f"{skeldir}/{row['pt_root_id']}.h5")        
        
# Get the axons
for rid, nrn in nrns.items():
    is_axon = meshwork.algorithms.split_axon_by_annotation(
        nrn,
        'pre_syn',
        'post_syn',
        return_quality=False
    )
    nrn.anno.add_annotations('is_axon', is_axon, mask=True)

1it [00:28, 28.68s/it]

Filter for presynaptic outputs on ET cell axons and concatenate into one dataframe:

In [None]:
# MAIN
#Filter for presynaptic outputs on ET cell axons and concatenate into one dataframe, adding synapse distance:

pre_dfs = []
for rid in et_df["pt_root_id"]:
    syn_filt = nrns[rid].anno.pre_syn.filter_query(
            nrns[rid].anno.is_axon.mesh_mask
    )
    df = syn_filt.df
    df['dist_to_root'] = nrns[rid].distance_to_root(syn_filt.mesh_index)
    df['distance_rank'] = df['dist_to_root'].rank()
    df.attrs = {}
    pre_dfs.append(df)

et_pre_df= pd.concat(pre_dfs, ignore_index=True)
et_pre_df['pre_pt_root_id'] = client.chunkedgraph.get_roots(et_pre_df['pre_pt_supervoxel_id'], timestamp=now)
et_pre_df['post_pt_root_id'] = client.chunkedgraph.get_roots(et_pre_df['post_pt_supervoxel_id'], timestamp=now).astype('int')


In [None]:
# MAIN
# Get single soma root ids and add cell types

soma_df = client.materialize.query_table(
    "nucleus_neuron_svm", filter_equal_dict={"cell_type": "neuron"}
)


# Add number of post_synaptic soma on a segment ID
soma_df['count_soma'] = soma_df.groupby('pt_root_id').transform('count')['id']
num_soma_df = soma_df.drop_duplicates(subset='pt_root_id')[['pt_root_id', 'count_soma']].rename(
    columns={"count_soma": "num_soma"})
num_soma_df = num_soma_df.drop_duplicates(subset="pt_root_id", keep='first')


def number_of_soma(row):               
    if pd.isna(row['num_soma']) == True:
          return 0    
    else:
          return row['num_soma']  
num_soma_df['num_soma'] = num_soma_df.apply(number_of_soma, axis=1)


# Remove all duplicates
soma_df = soma_df.drop_duplicates(subset="pt_root_id", keep='first')

# Download all the other tables we want to pull info from
#ei_aibs_df = client.materialize.query_table("allen_soma_coarse_cell_class_model_v2").drop_duplicates('pt_root_id', keep=False)
ei_bay_df = client.materialize.query_table("baylor_e_i_model_v1").drop_duplicates('pt_root_id', keep=False)
aibs_multi_df = client.materialize.query_table(
    "aibs_soma_nuc_metamodel_preds_v117",
    filter_equal_dict={"classification_system": "aibs_neuronal"},
).drop_duplicates('pt_root_id', keep=False)


# Download all the other tables we want to pull info from
NEURD_df = client.materialize.query_table("baylor_e_i_model_v1").drop_duplicates('pt_root_id', keep=False)

metamodel_df = client.materialize.query_table(
    "aibs_soma_nuc_metamodel_preds_v117",
    filter_equal_dict={"classification_system": "aibs_neuronal"},
).drop_duplicates('pt_root_id', keep=False)

mtypes_model_df = client.materialize.query_table(
    "aibs_soma_nuc_exc_mtype_preds_v117",
    filter_equal_dict={"classification_system": "aibs_coarse_excitatory"},
).drop_duplicates('pt_root_id', keep=False)

# Enrich soma_df with all this info
soma_df = (
    soma_df.merge(
        NEURD_df[["pt_root_id", "cell_type"]].rename(
            columns={"cell_type": "NEURD_class"}
        ),
        on="pt_root_id",
        how="left",
    )
    .merge(
        metamodel_df[["pt_root_id", "cell_type"]].rename(
            columns={"cell_type": "metamodel_cell_type"}
        ),
        on="pt_root_id",
        how="left",
    )
    .merge(
        mtypes_model_df[["pt_root_id", "cell_type"]].rename(
            columns={"cell_type": "mtypes_model_cell_type"}
        ),
        on="pt_root_id",
        how="left",
    )
)


In [None]:
# MAIN
#Add nucleus ID to dataframe


et_pre_df = (
    et_pre_df.merge(
        soma_df[["id", "pt_root_id"]].rename(
            columns={"id": "pre_nucleus_id"}
        ),
        left_on="pre_pt_root_id",
        right_on="pt_root_id",
        how="left",
    ).drop(columns="pt_root_id")
)


In [None]:
# MAIN
#Add class labels to soma_df
def standard_class_metamodel(row):
        
    if row['metamodel_cell_type'] == 'MC':
          return 'inhibitory'
 
    if row['metamodel_cell_type'] == 'BC':
          return 'inhibitory'
          
    if row['metamodel_cell_type'] == 'NGC':
          return 'inhibitory'
        
    if row['metamodel_cell_type'] == 'BPC':
          return 'inhibitory'
     
    if row['metamodel_cell_type'] == 'none':
          return None
               
    if pd.isna(row['metamodel_cell_type']) == True:
          return None
        
    else:
          return 'excitatory' 

soma_df['metamodel_class'] = soma_df.apply(standard_class_metamodel, axis=1)

In [None]:
# MAIN
#Merge all this info from cell types into the synapse dataframe, as well as add area locations.

#merge presynaptic nucleous ID
synapse_table = et_pre_df.merge(
    soma_df[
        ["id", "pt_root_id", "pt_position", "NEURD_class", "metamodel_class", "metamodel_cell_type", "mtypes_model_cell_type"]
    ].rename(columns={"pt_position": "post_soma_pt"}).rename(columns={"id": "post_nucleus_id"}),
    left_on="post_pt_root_id",
    right_on="pt_root_id",
    how="left",
).drop(columns="pt_root_id")

synapse_table["post_soma_area"] = synapse_table['post_soma_pt'].apply(classify_soma)

synapse_table = synapse_table.merge(
    et_df[['pt_root_id', 'pt_position']].rename(columns={'pt_position': 'pre_soma_pt'}),
    left_on='pre_pt_root_id',
    right_on='pt_root_id',
    how='left',
).drop(columns='pt_root_id')

synapse_table["pre_soma_area"] = synapse_table['pre_soma_pt'].apply(classify_soma)

synapse_table = synapse_table.rename(columns={"cell_type_pred": "aibs_auto_subclass"})


# load manual labels

manual_multi_df = client.materialize.live_live_query(
    'pt_synapse_targets',
    timestamp="now",
    metadata=False,
)
#manual_multi_df = client.materialize.query_table("pt_synapse_targets")
manual_multi_df = manual_multi_df.rename(columns={"target_id": "synapse_id"})
#manual_multi_df = client.materialize.query_table("pt_synapse_targets").drop_duplicates('post_pt_root_id', keep=False)
#manual_multi_df['post_pt_root_id'] = manual_multi_df.post_pt_root_id.astype('UInt64')


#manual_multi_df = pd.read_feather('manual_pt.feather')

synapse_table = synapse_table.rename(columns={"id": "synapse_id"})


#merge manual labels

synapse_table = (
    synapse_table.merge(
        manual_multi_df[["synapse_id", "classification_system"]].rename(
            columns={"classification_system": "manual_class"}
        ),
        on='synapse_id',
        how="left",
    )
    .merge(
        manual_multi_df[["synapse_id", "cell_type"]].rename(
            columns={"cell_type": "manual_subclass"}
        ),
        on='synapse_id',
        how="left",
    )
    .merge(
        num_soma_df[["pt_root_id", "num_soma"]],
        left_on='post_pt_root_id',
        right_on='pt_root_id',
        how="left",
    ).drop(columns='pt_root_id')
)  

def number_of_soma(row):               
    if pd.isna(row['num_soma']) == True:
          return 0    
    else:
          return row['num_soma']  
synapse_table['num_soma'] = synapse_table.apply(number_of_soma, axis=1)

In [None]:
# MAIN
# Add alternative nucleous points

nucleus_alternative_df = client.materialize.query_table('nucleus_alternative_points')

synapse_table = synapse_table.merge(
    nucleus_alternative_df[
        ["pt_root_id", "id_ref", "pt_position"]
    ].rename(columns={"pt_position": "post_soma_pt2"}),
    left_on="post_pt_root_id",
    right_on="pt_root_id",
    how="left",
).drop(columns="pt_root_id")


def add_nucleus_ID(row):
   
    if pd.isna(row['id_ref']) == True:
          return row['post_nucleus_id']
    
    else:
          return row['id_ref']  

synapse_table['post_nucleus_id'] = synapse_table.apply(add_nucleus_ID, axis=1)



def add_soma_pt(row):
   
    if pd.isna(row['id_ref']) == True:
          return row['post_soma_pt']
    
    else:
          return row['post_soma_pt2']  

synapse_table['post_soma_pt'] = synapse_table.apply(add_soma_pt, axis=1)



def add_num_soma(row):
   
    if pd.isna(row['id_ref']) == True:
          return row['num_soma']
    
    else:
          if row['num_soma'] > 0:
                print(row['num_soma'])
                print('ID:', row['id_ref'], ' has ', row['num_soma'], ' somata' )
                return row['num_soma']
        
          else:
                return 1  

synapse_table['num_soma'] = synapse_table.apply(add_num_soma, axis=1)

synapse_table = synapse_table.drop(['id_ref', 'post_soma_pt2'], axis=1)

In [None]:
#MAIN
#standardize class labels
def standard_class_NEURD(row):
        
    if pd.isna(row['NEURD_class']) == True:
          return None
    else:
          return row['NEURD_class']  

synapse_table['NEURD_class'] = synapse_table.apply(standard_class_NEURD, axis=1)

In [None]:
#MAIN
#standardize sub_class labels
def standard_subclass_metamodel(row):
        
    if row['metamodel_cell_type'] == '6P-IT':
          return '6P'

    if row['metamodel_cell_type'] == '6P-CT':
          return '6P'
    
    if pd.isna(row['metamodel_cell_type']) == True:
          return None
    
    else:
          return row['metamodel_cell_type']  

synapse_table['metamodel_cell_type'] = synapse_table.apply(standard_subclass_metamodel, axis=1)

In [None]:
#MAIN
#standardize sub_class labels
def standard_subclass_mytpes_model(row):
        
    if row['mtypes_model_cell_type'] == 'L3c':
          return '23P'
    
    if row['mtypes_model_cell_type'] == 'L5ET':
          return '5P-PT'

    if row['mtypes_model_cell_type'] == 'L2b':
          return '23P'
        
    if row['mtypes_model_cell_type'] == 'L6a':
          return '6P'
  
    if row['mtypes_model_cell_type'] == 'L4c':
          return '4P'
        
    if row['mtypes_model_cell_type'] == 'L6c':
          return '6P'
        
    if row['mtypes_model_cell_type'] == 'L6CT':
          return '6P'
        
    if row['mtypes_model_cell_type'] == 'L6b':
          return '6P'
        
    if row['mtypes_model_cell_type'] == 'L4a':
          return '4P'
        
    if row['mtypes_model_cell_type'] == 'L2a':
          return '23P'
        
    if row['mtypes_model_cell_type'] == 'L3b':
          return '23P'
        
    if row['mtypes_model_cell_type'] == 'L3a':
          return '23P'

    if row['mtypes_model_cell_type'] == 'L5b':
          return '5P-IT'

    if row['mtypes_model_cell_type'] == 'L4b':
          return '4P'

    if row['mtypes_model_cell_type'] == 'L5a':
          return '5P-IT'

    if row['mtypes_model_cell_type'] == 'L5NP':
          return '5P-NP'

    if row['mtypes_model_cell_type'] == 'L6wm':
          return '6P'

    if pd.isna(row['mtypes_model_cell_type']) == True:
          return None
    else:
          return row['mtypes_model_cell_type']  

synapse_table['mtypes_model_cell_type'] = synapse_table.apply(standard_subclass_mytpes_model, axis=1)

In [None]:
#MAIN
#standardize class labels from manual
def standard_class_man(row):
        
    if row['manual_class'] == 'none':
          return None        

    if pd.isna(row['manual_class']) == True:
          return None
         
    else:
          return row['manual_class']  

synapse_table['manual_class'] = synapse_table.apply(standard_class_man, axis=1)

synapse_table = synapse_table[(synapse_table['manual_class'] != 'error')]

In [None]:
#MAIN
#standardize sub_class labels
def standard_subclass_man(row):
        
    if row['manual_subclass'] == 'multisoma':
          return None

    if row['manual_subclass'] == 'DTC':
          return 'MC'               
               
    if row['manual_subclass'] == 'none':
          return None

    if row['manual_subclass'] == '5P-PT':
          return '5P-ET'
        
    if row['manual_subclass'] == 'unclear':
          return None
    
    if pd.isna(row['manual_subclass']) == True:
          return None
    else:
          return row['manual_subclass']  

synapse_table['manual_subclass'] = synapse_table.apply(standard_subclass_man, axis=1)

In [None]:
# MAIN
#QC - CHECK IF THERE ARE DFERRENT MANUAL CLASS LABELS ASIGNED TO THE SAME NEURON 

for ii in synapse_table.post_pt_root_id.unique():

    if len(synapse_table[(synapse_table['post_pt_root_id'] == ii) &
                    pd.notna(synapse_table['manual_class'])].manual_class.unique()) > 1:
    #if len(synapse_table[(synapse_table['post_pt_root_id'] == ii) & (synapse_table['num_soma'] < 2) &
    #                pd.notna(synapse_table['manual_class'])].manual_class.unique()) > 1:
        print(ii)    

In [None]:
# MAIN
#QC - CHECK IF THERE ARE DFERRENT MANUAL SUBCLASS LABELS ASIGNED TO THE SAME NEURON 

for ii in synapse_table.post_pt_root_id.unique():

    if len(synapse_table[(synapse_table['post_pt_root_id'] == ii) & (synapse_table['num_soma'] < 2) 
                         & pd.notna(synapse_table['manual_subclass'])].manual_subclass.unique()) > 1:
        print(ii)
        
    if len(synapse_table[(synapse_table['post_pt_root_id'] == ii) & (synapse_table['num_soma'] < 2) 
                         & pd.notna(synapse_table['manual_subclass'])].manual_subclass.unique()) > 1:

        print(ii) 

In [None]:
# MAIN
#TRANSFER MANUAL SUBCLASS LABELS ACROSS SYNAPSES OF THE SAME NEURON

#Create df with subclass labels and only one post_pt_root_id for IDs that are single somas or orphans
manual_subclass_labels = synapse_table[(synapse_table['num_soma'] <= 1) &
                                      pd.notna(synapse_table['manual_subclass'])].drop_duplicates(subset='post_pt_root_id')

manual_subclass_labels = manual_subclass_labels[['post_pt_root_id', 'manual_subclass']] 

#Transfer the subclass labels using the merge function 
#(In an earlier version I created a new table after this point "#synapse_table_transfer")

synapse_table = synapse_table.merge(manual_subclass_labels, left_on='post_pt_root_id',
                                                      right_on='post_pt_root_id', how='left')

#Transfer the subclass labels on multisoma
def subclass_transfer(row):
   
    if pd.isna(row['manual_subclass_y']) == True:
          return row['manual_subclass_x']
   
    else:
          return row['manual_subclass_y']  

synapse_table['manual_subclass_y'] = synapse_table.apply(subclass_transfer, axis=1)

#Rename columns
synapse_table = synapse_table.rename(columns={"manual_subclass_x": "manual_subclass_original",
                                                                "manual_subclass_y": "manual_subclass"})


In [None]:
#MAIN
#TRANSFER Manual CLASS LABELS ACROSS SYNAPSES OF THE SAME NEURON

#Create df with subclass labels and only one post_pt_root_id for IDs that are single somas or orphans
manual_class_labels = synapse_table[(synapse_table['num_soma'] <= 1) &
                                      pd.notna(synapse_table['manual_class'])].drop_duplicates(subset='post_pt_root_id')

manual_class_labels = manual_class_labels[['post_pt_root_id', 'manual_class']] 

#Transfer the subclass labels using the merge function
synapse_table = synapse_table.merge(manual_class_labels, left_on='post_pt_root_id',
                                                      right_on='post_pt_root_id', how='left')

#Transfer multisoma labels
def class_transfer(row):
   
    if pd.isna(row['manual_class_y']) == True:
          return row['manual_class_x']
   
    else:
          return row['manual_class_y']  

synapse_table['manual_class_y'] = synapse_table.apply(class_transfer, axis=1)

#Rename columns
synapse_table = synapse_table.rename(columns={"manual_class_x": "manual_class_original",
                                              "manual_class_y": "manual_class"})

#bool_mask = synapse_table_transfer['manual_class_y'].isna()
#synapse_table_transfer[bool_mask]['manual_class_y'] = synapse_table_transfer[bool_mask]['manual_class_x'] 


In [None]:
# MAIN
#QC - CHECK IF THE MANUAL CLASS AND SUBCLASS ARE CONSISTENT

#create new column where class is calculated from subclass
def create_class_from_subclass(row):
    if row['manual_subclass'] == '5P-NP':
          return 'excitatory'
    if row['manual_subclass'] == '5P-ET':
          return 'excitatory'
    if row['manual_subclass'] == '5P-IT':
          return 'excitatory'
    if row['manual_subclass'] == '4P':
          return 'excitatory'
    if row['manual_subclass'] == '6P':
          return 'excitatory'
    if row['manual_subclass'] == '23P':
          return 'excitatory'
    if row['manual_subclass'] == 'BC':
          return 'inhibitory'
    if row['manual_subclass'] == 'MC':
          return 'inhibitory'
    if row['manual_subclass'] == 'BPC':
          return 'inhibitory'
    else:
          return row['manual_subclass']

synapse_table['class_from_subclass'] = synapse_table.apply(create_class_from_subclass, axis=1)


def check_class_from_subclass(row):
   
    if row['manual_class'] == row['class_from_subclass']:
          return 'OK'
   
    else:
          return row['manual_subclass']  

synapse_table['check_class_from_subclass'] = synapse_table.apply(check_class_from_subclass, axis=1)

synapse_table.check_class_from_subclass.unique()


In [None]:
# MAIN
#QC - CHECK DISAGREEMENT BETWEEN Baylor and AIBS LABELS

manual_check1 = synapse_table[(synapse_table['NEURD_class'] == 'excitatory') 
              & (synapse_table['metamodel_class'] == 'inhibitory') & pd.isna(synapse_table['manual_class'])].drop_duplicates(subset='post_pt_root_id')#.post_pt_root_id.unique()

print('number of unchecked disagreements where NEURD is "E" and metamodel is "I": ', len(manual_check1))


manual_check2 = synapse_table[(synapse_table['NEURD_class'] == 'inhibitory') 
              & (synapse_table['metamodel_class'] == 'excitatory') & pd.isna(synapse_table['manual_class'])].drop_duplicates(subset='post_pt_root_id')#.post_pt_root_id.unique()

print('number of unchecked disagreements where NEURD is "I" and metamodel is "E": ', len(manual_check2))


In [None]:
#MAIN
#QC - CHECK FOR LABELS WITH NO ENTRIES

manual_check = synapse_table[pd.isna(synapse_table['NEURD_class']) 
              & pd.isna(synapse_table['metamodel_class']) & pd.isna(synapse_table['manual_class'])].drop_duplicates(subset='post_pt_root_id')#.post_pt_root_id.unique()

manual_check

In [None]:
#MAIN
#INTEGRATE CLASS LABELS BETWEEN MANUAL AND AUTOMATED LABELS

#generate new consensus column
synapse_table['consensus_class'] = synapse_table['manual_class']

#When there isn't manual label add aibs_v2 label

def integrate_class(row):
    if row['consensus_class'] == None:
          return row['metamodel_class']
    
    else:
          return row['consensus_class']  

synapse_table['consensus_class'] = synapse_table.apply(integrate_class, axis=1)  

In [None]:
#MAIN
#INTEGRATE SUBCLASS LABELS BETWEEN MANUAL AND AUTOMATED LABELS

#generate new consensus column
synapse_table['consensus_subclass'] = synapse_table['manual_subclass']

#When there isn't manual label add aibs_v2 label

def integrate_subclass(row):
    if row['consensus_subclass'] == None:
          return row['metamodel_cell_type']
    if row['consensus_subclass'] == 'inhibitory':
          return None      
    
    else:
          return row['consensus_subclass']  

synapse_table['consensus_subclass'] = synapse_table.apply(integrate_subclass, axis=1)  

In [None]:
#MAIN
#QC - CHECK IF THE INTEGRATED CLASS AND SUBCLASS ARE CONSISTENT

#remove previous columns
synapse_table = synapse_table.drop(['class_from_subclass', 'check_class_from_subclass'], axis=1)
#synapse_table = synapse_table.drop(['class_from_subclass'], axis=1)


#create new column where class is calculated from subclass
def create_class_from_subclass(row):
    if row['consensus_subclass'] == '5P-NP':
          return 'excitatory'
    if row['consensus_subclass'] == '5P-ET':
          return 'excitatory'
    if row['consensus_subclass'] == '5P-IT':
          return 'excitatory'
    if row['consensus_subclass'] == '4P':
          return 'excitatory'
    if row['consensus_subclass'] == '6P':
          return 'excitatory'
    if row['consensus_subclass'] == '6P-IT':
          return 'excitatory'
    if row['consensus_subclass'] == '6P-CT':
          return 'excitatory'
    if row['consensus_subclass'] == '23P':
          return 'excitatory'
    if row['consensus_subclass'] == 'BC':
          return 'inhibitory'
    if row['consensus_subclass'] == 'MC':
          return 'inhibitory'
    if row['consensus_subclass'] == 'NGC':
          return 'inhibitory'
    if row['consensus_subclass'] == 'BPC':
          return 'inhibitory'
    if row['consensus_subclass'] == 'inhibitory':
          return 'unknown'        
        
    else:
          return row['consensus_subclass']

synapse_table['class_from_subclass'] = synapse_table.apply(create_class_from_subclass, axis=1)


def check_class_from_subclass(row):
   
    if row['consensus_class'] == row['class_from_subclass']:
          return 'OK'
   
    else:
          return row['consensus_subclass']  

synapse_table['check_class_from_subclass'] = synapse_table.apply(check_class_from_subclass, axis=1)

synapse_table.check_class_from_subclass.unique()


In [None]:
# MAIN
#SAVE AND READ

#remove columns before saving
synapse_table = synapse_table.drop(['class_from_subclass', 'check_class_from_subclass'], axis=1)


#save et_pre_ct_df
synapse_table.reset_index(drop=True).to_feather("ET_extended_synapse_table.feather")

#READ
#et_pre_ct_df = pd.read_feather('ET_Column_syn_df_NC.feather')

# EXTRAS TO BE DELETED

In [None]:
manual_check = synapse_table[(synapse_table['post_pt_root_id'] == 864691136953075423)]

In [None]:
manual_check

In [None]:
#CREATE NEUROGLANCER LINK


manual_check = synapse_table[(synapse_table['synapse_id']== 138873793)].drop_duplicates(subset='post_pt_root_id')
#manual_check = synapse_table[(synapse_table['manual_eiaibs_subclass']== '5P-PT') & (synapse_table['pre_pt_root_id']==864691135293076662)].drop_duplicates(subset='post_pt_root_id')
#manual_check = manual_check.drop_duplicates(subset='post_pt_root_id')


from nglui import statebuilder

img, seg = statebuilder.from_client(client)

pt_map = statebuilder.PointMapper('post_pt_position', linked_segmentation_column='post_pt_root_id')
anno = statebuilder.AnnotationLayerConfig('post_pt_position', mapping_rules=pt_map, linked_segmentation_layer=seg.name,
                                          tags=['single_spine','dendrite', 'error', 'has_soma'])
sb = statebuilder.StateBuilder([img, seg, anno], client=client)

#here is where you add the dataframe
sb.render_state(manual_check[['post_pt_root_id','post_pt_position']], return_as='html')

#[id, x,y,z]

In [None]:
synapse_table[['synapse_id','pre_nucleus_id']]

In [None]:
synapse_table_1.num_soma.unique()

In [None]:
post_nucleus_ID_df = synapse_table[(synapse_table['post_pt_root_id'] == 864691136418722199)]
         

In [None]:
post_nucleus_ID_df.post_nucleus_id.astype('int').unique().item()

In [None]:
post_nucleus_ID_df.consensus_class.unique().item()

In [None]:
#MAKE STATS DATAFRAME - All synapses and connections

pd.options.display.max_rows = None
pd.options.display.max_columns = None

#Load dataframe
synapse_table = pd.read_feather("ET_extended_synapse_table.feather")
synapse_table = synapse_table[synapse_table['num_soma'] == 1]


#get ET neurons root IDs
post_soma_IDs = synapse_table.post_pt_root_id.unique()
print('number of connections: ',len(All_neurons))

#'y' location of cortical surface
surface_y_column =[84534, 85689, 86053, 87800, 89421, 90105, 82884, 81677, 86242]
average_surface_location = mean(surface_y_column)


#Create Dataframe

np.seterr(divide='ignore', invalid='ignore')

values = []

for ii,post_soma_ID in enumerate(pre_soma_IDs):        
        
    post_nucleus_ID_df = synapse_table[(synapse_table['post_pt_root_id'] == post_soma_ID)],
                    
    
    stat_values={

                    'ID': post_soma_ID,
                    'nucleus_ID':  post_nucleus_ID_df.post_nucleus_id.astype('int').unique().item(),
                    
    #SYNAPSES
                    
                    'syn_total': len(post_nucleus_ID_df),
                    'class': post_nucleus_ID_df.consensus_class.unique().item(),
                    'subclass': post_nucleus_ID_df.consensus_subclass.unique().item(),
                        
        
    }
    values.append(stat_values)

synapse_table_values = pd.DataFrame(values)

In [None]:
et_pre_ct_df.manual_subclass.unique()

`pt_pre_ct_df` is now a complete list of axonal presynaptic synapses of all the ET cells.

Now let's look at the downstream proofread cells in table `bodor_pt_target_proofread`

In [None]:
#save pt_pre_ct_df
et_pre_ct_df.reset_index(drop=True).to_feather("ET_ccomplete_syn_df_NC_20230505b.feather")


In [None]:
manual_multi_df.reset_index(drop=True).to_feather("manual_pt.feather")

In [None]:
target_df = client.materialize.query_table('bodor_pt_target_proofread')

In [None]:
mc_df = target_df.query('cell_type == "MC"')
bc_df = target_df.query('cell_type == "BC"')

Now let's go down the BC targets:

In [None]:
bc_output_df = client.materialize.synapse_query(pre_ids=bc_df['pt_root_id'])

bc_output_df = (
    bc_output_df.merge(
        soma_df[
        ["id","pt_root_id", "pt_position", "ei_aibs", "ei_baylor", "cell_type_pred"]
        ].rename(columns={"pt_position": "target_soma_pt"}).rename(columns={"id": "post_nucleus_id"}),
        left_on='post_pt_root_id',
        right_on='pt_root_id',
        how="left",
    ).drop(columns="pt_root_id")
    .merge(
        soma_df[
        ["id","pt_root_id"]].rename(columns={"id": "pre_nucleus_id"}),
        left_on='pre_pt_root_id',
        right_on='pt_root_id',
        how="left",
    ).drop(columns="pt_root_id")
    .merge(
        num_soma_df[["pt_root_id", "num_soma"]],
        left_on='post_pt_root_id',
        right_on='pt_root_id',
        how="left",
    ).drop(columns='pt_root_id')
)

Now let's add a column about number of synapses from proofread ET cells:

In [None]:
syn_from_et = pt_pre_ct_df.groupby('post_pt_root_id').count()[['id']].rename(columns={'id': 'syn_from_et'})

In [None]:
bc_output_df = bc_output_df.merge(
    syn_from_et,
    left_on='post_pt_root_id',
    right_index=True,
    how='left',
)

bc_output_df['syn_from_et'] = bc_output_df['syn_from_et'].fillna(0).astype(int)

In [None]:
len(bc_output_df[(bc_output_df['syn_from_et'] > 1)].drop_duplicates(subset="post_pt_root_id", keep='first'))/len(bc_output_df.drop_duplicates(subset="post_pt_root_id", keep='first'))

In [None]:
single_soma_nucs=mc_output_df.query('num_soma == 1').nucleus_id.unique()
len(single_soma_nucs)

In [None]:
all_nuc_ids=mc_output_df.nucleus_id.unique()
len(all_nuc_ids)

In [None]:
all_nuc_ids[~np.isin(all_nuc_ids, single_soma_nucs)]

In [None]:
len(bc_output_df.query('num_soma == 0').nucleus_id)

And the MC targets:

In [None]:
mc_output_df = client.materialize.synapse_query(pre_ids=mc_df['pt_root_id'])

mc_output_df = (
    mc_output_df.merge(
        soma_df[
        ["id","pt_root_id", "pt_position", "ei_aibs", "ei_baylor", "cell_type_pred"]
        ].rename(columns={"pt_position": "target_soma_pt"}).rename(columns={"id": "post_nucleus_id"}),
        left_on='post_pt_root_id',
        right_on='pt_root_id',
        how="left",
    ).drop(columns="pt_root_id")
    .merge(
        soma_df[
        ["id","pt_root_id"]].rename(columns={"id": "pre_nucleus_id"}),
        left_on='pre_pt_root_id',
        right_on='pt_root_id',
        how="left",
    ).drop(columns="pt_root_id")
    .merge(
        num_soma_df[["pt_root_id", "num_soma"]],
        left_on='post_pt_root_id',
        right_on='pt_root_id',
        how="left",
    ).drop(columns='pt_root_id')
)

In [None]:
mc_output_df = mc_output_df.merge(
    syn_from_et,
    left_on='post_pt_root_id',
    right_index=True,
    how='left',
)

mc_output_df['syn_from_et'] = mc_output_df['syn_from_et'].fillna(0).astype(int)

In [None]:
len(mc_output_df[(mc_output_df['syn_from_et'] > 1)].drop_duplicates(subset="post_pt_root_id", keep='first'))/len(mc_output_df.drop_duplicates(subset="post_pt_root_id", keep='first'))

Let's just glimpse at the resulting data:

In [None]:
bc_output_df.drop_duplicates(subset="post_pt_root_id", keep='first').query('syn_from_et > 5 and target_soma_pt.str[1] > 165000').groupby('cell_type_pred').count()[['valid']]


In [None]:
bc_output_df.drop_duplicates(subset="post_pt_root_id", keep='first').query('target_soma_pt.str[1] > 165000').groupby('cell_type_pred').count()[['valid']]


In [None]:
bc_output_df.query('syn_from_et > 0 and ei_baylor == "inhibitory" and cell_type_pred == "MC"')['post_pt_root_id'].unique()

In [None]:
#save bc_output_df
bc_output_df.reset_index(drop=True).to_feather("BC_syn_df_NC_9Sep.feather")

#save MC_output_df
mc_output_df.reset_index(drop=True).to_feather("MC_syn_df_NC_9Sep.feather")


In [None]:
mc_output_df.query('syn_from_et > 0').groupby('cell_type_pred').count()[['valid']]

In [None]:
mc_output_df.drop_duplicates(subset="post_pt_root_id", keep='first').query('syn_from_et > 10 and target_soma_pt.str[1] > 165000').groupby('cell_type_pred').count()[['valid']]


In [None]:
mc_output_df.drop_duplicates(subset="post_pt_root_id", keep='first').query('target_soma_pt.str[1] > 165000').groupby('cell_type_pred').count()[['valid']]


In [None]:
bc_output_df.head()


In [None]:
pt_pre_ct_df.head()

In [None]:
common_targets = mc_output_df.query('syn_from_et > 0 and ei_aibs == "inhibitory"  and cell_type_pred == "BC" and target_soma_pt.str[1] > 165000').drop_duplicates(subset="post_pt_root_id", keep='first')
bc_output_df.query('post_pt_root_id == 864691136618564493')
#864691136266742772
#common_targets

In [None]:
#CREATE NEUROGLANCER LINK

from nglui import statebuilder

img, seg = statebuilder.from_client(client)

pt_map = statebuilder.PointMapper('target_soma_pt', linked_segmentation_column='post_pt_root_id')
anno = statebuilder.AnnotationLayerConfig('soma_pts', mapping_rules=pt_map, linked_segmentation_layer=seg.name,
                                          tags=['', 'BC'])
sb = statebuilder.StateBuilder([img, seg, anno], client=client)


sb.render_state(common_targets[['post_pt_root_id','target_soma_pt']], return_as='html')



In [None]:
soma2_df = client.materialize.query_table(
    "nucleus_neuron_svm")



In [None]:
soma2_df

In [None]:

test_MC_df = test_MC_df.merge(
    soma2_df,
    left_on='post_pt_root_id',
    right_on = 'pt_root_id',
    how='left',
)


In [None]:

#select single soma inhibitory targets
MC_i_targets_df = mc_output_df[['nucleus_id','ei_aibs', 'num_soma']].query('ei_aibs == "inhibitory"  and num_soma == 1')
BC_i_targets_df = bc_output_df[['nucleus_id','ei_aibs', 'num_soma']].query('ei_aibs == "inhibitory"  and num_soma == 1')
PT_i_targets_df = pt_pre_ct_df[['nucleus_id','ei_aibs', 'num_soma']].query('ei_aibs == "inhibitory"  and num_soma == 1')

#concatonate inhibitory targets of MC, BC and PTs
somaIDs_df = pd.concat([MC_i_targets_df,BC_i_targets_df])
somaIDs_df = pd.concat([somaIDs_df,PT_i_targets_df])
somaIDs_df = somaIDs_df.drop_duplicates(subset="nucleus_id", keep='first')

#save result
somaIDs_df.reset_index(drop=True).to_feather("inhibitory_targets_Nuno.feather")

somaIDs_df

In [None]:
somaIDs_df.query('nucleus_id == 303172')

In [None]:
mc_output_df

In [None]:
test_MC_df['post_pt_root_id'] = test_MC_df.post_pt_root_id.astype('UInt64')
test_MC_df


In [None]:
len(test_MC_df.query('cell_type == "neuron"').id.unique())

In [None]:
somaIDs_df = pd.concat([test_MC_df,test_BC_df])

In [None]:
somaIDs_df = somaIDs_df.drop_duplicates(subset="id", keep='first')

In [None]:
len(somaIDs_df)

In [None]:
a = somaIDs_df.id.unique()

In [None]:
len(a)


# Brendan data frame

In [None]:
Brendan_synapses = pd.read_csv('Nuno_downstream_targets.csv')

In [None]:
Brendan_synapses = Brendan_synapses.merge(
    soma_df[
    ["id", "ei_aibs", "ei_baylor", "cell_type_pred"]
    ],
    left_on="postsyn_nucleus_id",
    right_on="id",
    how="left",
).drop(columns="id")


In [None]:
# If syn_from_et was not generated before
syn_from_et = syn_from_et.merge(
    soma_df[
    ["id", "pt_root_id" ]].rename(columns={"id": "nucleus_id"}),
    left_on="post_pt_root_id",
    right_on="pt_root_id",
    how="left",
    )
#.drop(columns="pt_root_id")

In [None]:
#merge with number of synapses from PT
Brendan_synapses = Brendan_synapses.merge(
    syn_from_et[
    ["nucleus_id", "syn_from_et"]
    ],
    left_on="presyn_nucleus_id",
    right_on="nucleus_id",
    how="left",
).drop(columns="nucleus_id")

Brendan_synapses['syn_from_et'] = Brendan_synapses['syn_from_et'].fillna(0).astype(int)

In [None]:
Brendan_synapses.syn_from_et.unique()
#Brendan_synapses
#syn_from_et

In [None]:
#get basket cells post synaptic to PT neurons and cleanned by Brendan

BC_clean = np.intersect1d(Brendan_synapses.query('syn_from_et > 1').presyn_nucleus_id.unique(), 
                          pt_pre_ct_df.query('aibs_auto_subclass == "BC"').post_nucleus_id.unique())

BC_clean

In [None]:

#Brendan_synapses.syn_from_et.unique()
#Brendan_synapses.query('presyn_nucleus_id == 232945')

# ANALYSIS

In [None]:
bc_output_df.query('pre_nucleus_id == 303172').cell_type_pred.unique()


In [None]:
pre_soma_ID = 303172
temp_table = bc_output_df[bc_output_df['pre_nucleus_id'] == pre_soma_ID]
temp_table
BC_syn = len(temp_table.query('cell_type_pred == "BC"'))
BC_syn

In [None]:
temp_table['cell_type_pred'].isnull().sum()

In [None]:
#Analisys total number of synapses (AIBS)

stats = []

#pre_soma_ID = 340252
pre_soma_ID = mc_output_df.pre_nucleus_id.unique()

#temp_table = mc_output_df[mc_output_df['pre_nucleus_id'] == pre_soma_ID]


for ii in pre_soma_ID:                
        
    print(ii)
    temp_table = mc_output_df[mc_output_df['pre_nucleus_id'] == ii]
    #Synapses
    total_syn = len(temp_table)

    BC_syn = len(temp_table.query('cell_type_pred == "BC"'))
    MC_syn = len(temp_table.query('cell_type_pred == "MC"'))
    BPC_syn = len(temp_table.query('cell_type_pred == "BPC"'))
    NGC_syn = len(temp_table.query('cell_type_pred == "NGC"'))

    P23_syn = len(temp_table.query('cell_type_pred == "23P"'))
    P4_syn = len(temp_table.query('cell_type_pred == "4P"'))
    P5_NP_syn = len(temp_table.query('cell_type_pred == "5P-NP"'))
    P5_ET_syn = len(temp_table.query('cell_type_pred == "5P-ET"'))
    P5_IT_syn = len(temp_table.query('cell_type_pred == "5P-IT"'))
    P6_IT_syn = len(temp_table.query('cell_type_pred == "6P-IT"'))
    P6_CT_syn = len(temp_table.query('cell_type_pred == "6P-CT"'))

    unasigned_syn = temp_table['cell_type_pred'].isnull().sum()

    total_syn_sum = sum([BC_syn, MC_syn, BPC_syn, NGC_syn,
                    P23_syn, P4_syn, P5_NP_syn, P5_ET_syn, P5_IT_syn,
                    P6_IT_syn, P6_CT_syn, unasigned_syn])

    print(total_syn)
    print(total_syn_sum)        


    #del temp_table

    stat={

                    'ID': ii, 
                    'BC': BC_syn,
                    'MC': MC_syn,
                    'BPC': BPC_syn,
                    'NGC': NGC_syn,

                    '23P': P23_syn,
                    '4P' : P4_syn,
                    '5P-NP':P5_NP_syn,
                    '5P-ET':P5_ET_syn,
                    '5P-IT':P5_IT_syn,
                    '6P-IT':P6_IT_syn,
                    '6P-CT':P6_CT_syn,

                    'unasigned': unasigned_syn,
                    '%5P-ET': P5_ET_syn/(P5_NP_syn+P5_ET_syn+P5_IT_syn),

                    }
    stats.append(stat)

stats_AIBS_MC = pd.DataFrame(stats)


#stats_type.append(stat)   

In [None]:
P5_ET_syn/(P5_NP_syn+P5_ET_syn+P5_IT_syn)

In [None]:
Brendan_synapses

In [None]:
#Analisys total number of synapses (BCM)

cell_typing = 'AIBS'

stats = []

#pre_soma_ID = [232945]
pre_soma_ID = BC_clean
#pre_soma_ID = Brendan_synapses.presyn_nucleus_id.unique()

for ii in pre_soma_ID:

    print(ii)
    temp_table = Brendan_synapses[Brendan_synapses['presyn_nucleus_id'] == ii]

    #Synapses
    total_syn = len(temp_table)

    if cell_typing == 'AIBS':

        BC_syn = len(temp_table.query('cell_type_pred == "BC"'))
        MC_syn = len(temp_table.query('cell_type_pred == "MC"'))
        BPC_syn = len(temp_table.query('cell_type_pred == "BPC"'))
        NGC_syn = len(temp_table.query('cell_type_pred == "NGC"'))

        P23_syn = len(temp_table.query('cell_type_pred == "23P"'))
        P4_syn = len(temp_table.query('cell_type_pred == "4P"'))
        P5_NP_syn = len(temp_table.query('cell_type_pred == "5P-NP"'))
        P5_ET_syn = len(temp_table.query('cell_type_pred == "5P-ET"'))
        P5_IT_syn = len(temp_table.query('cell_type_pred == "5P-IT"'))
        P6_IT_syn = len(temp_table.query('cell_type_pred == "6P-IT"'))
        P6_CT_syn = len(temp_table.query('cell_type_pred == "6P-CT"'))

        unasigned_syn = temp_table['cell_type_pred'].isnull().sum()
        

    if cell_typing == 'BCM':

        BC_syn = len(temp_table.query('postsyn_gnn_cell_type_fine == "BC"'))
        MC_syn = len(temp_table.query('postsyn_gnn_cell_type_fine == "MC"'))
        BPC_syn = len(temp_table.query('postsyn_gnn_cell_type_fine == "BPC"'))
        NGC_syn = len(temp_table.query('postsyn_gnn_cell_type_fine == "NGC"'))

        P23_syn = len(temp_table.query('postsyn_gnn_cell_type_fine == "23P"'))
        P4_syn = len(temp_table.query('postsyn_gnn_cell_type_fine == "4P"'))
        P5_NP_syn = len(temp_table.query('postsyn_gnn_cell_type_fine == "5P-NP"'))
        P5_ET_syn = len(temp_table.query('postsyn_gnn_cell_type_fine == "5P-PT"'))
        P5_IT_syn = len(temp_table.query('postsyn_gnn_cell_type_fine == "5P-IT"'))
        P6_IT_syn = len(temp_table.query('postsyn_gnn_cell_type_fine == "6P-IT"'))
        P6_CT_syn = len(temp_table.query('postsyn_gnn_cell_type_fine == "6P-CT"'))

        unasigned_syn = temp_table['postsyn_gnn_cell_type_fine'].isnull().sum()
        
    total_syn_L5 = P5_NP_syn+P5_ET_syn+P5_IT_syn
    syn_from_et_value = np.int(temp_table.syn_from_et.unique())
    
    print(total_syn_L5)
        
    if total_syn_L5 > 0:
        percent_5P_ET = P5_ET_syn/(P5_NP_syn+P5_ET_syn+P5_IT_syn)
    else:
        percent_5P_ET = np.nan



    total_syn_sum = sum([BC_syn, MC_syn, BPC_syn, NGC_syn,
                    P23_syn, P4_syn, P5_NP_syn, P5_ET_syn, P5_IT_syn,
                    P6_IT_syn, P6_CT_syn, unasigned_syn])

    print(total_syn)
    print(total_syn_sum)        


    #del temp_table

    stat={

                    'ID': ii, 
                    'BC': BC_syn,
                    'MC': MC_syn,
                    'BPC': BPC_syn,
                    'NGC': NGC_syn,

                    'P23': P23_syn,
                    'P4' : P4_syn,
                    'P5-NP':P5_NP_syn,
                    'P5-ET':P5_ET_syn,
                    'P5-IT':P5_IT_syn,
                    'P6-IT':P6_IT_syn,
                    'P6-CT':P6_CT_syn,

                    'unasigned': unasigned_syn,
                    'percent_5P_ET': percent_5P_ET,
                    'P5_total_syn': total_syn_L5,
                    'syn_from_et': syn_from_et_value,

                    }
    stats.append(stat)

stats_BCM = pd.DataFrame(stats)


#stats_type.append(stat)   

In [None]:
stats_BCM#.syn_from_et.unique()

In [None]:
BC_clean

In [None]:
stats_AIBS

In [None]:
mc_output_df.pre_nucleus_id.unique()

In [None]:
import matplotlib.pyplot as plt
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])

vals = np.array([stats_AIBS.iloc[idx]['BC'],stats_AIBS.iloc[idx]['MC'],
                  stats_AIBS.iloc[idx]['BPC'],stats_AIBS.iloc[idx]['NGC'],
                  stats_AIBS.iloc[idx]['23P'],stats_AIBS.iloc[idx]['4P'],
                  stats_AIBS.iloc[idx]['5P-NP'],stats_AIBS.iloc[idx]['5P-ET'],
                  stats_AIBS.iloc[idx]['5P-IT'],stats_AIBS.iloc[idx]['6P-IT'],
                  stats_AIBS.iloc[idx]['6P-CT'],stats_AIBS.iloc[idx]['orphans']])

lab = ['BC', 'MC', 'BPC', 'NGC', '23P', '4P', '5P-NP', '5P-ET', '5P-IT','6P-IT','6P-CT','orphans']

ax.bar(lab,vals)
plt.show()

In [None]:
import matplotlib.pyplot as plt
idx = 17

fig = plt.figure()
ax = fig.add_axes([0,0,1,1])

vals = np.array([stats_BCM.iloc[idx]['BC'],stats_BCM.iloc[idx]['MC'],
                  stats_BCM.iloc[idx]['BPC'],stats_BCM.iloc[idx]['NGC'],
                  stats_BCM.iloc[idx]['P23'],stats_BCM.iloc[idx]['P4'],
                  stats_BCM.iloc[idx]['P5-NP'],stats_BCM.iloc[idx]['P5-ET'],
                  stats_BCM.iloc[idx]['P5-IT'],stats_BCM.iloc[idx]['P6-IT'],
                  stats_BCM.iloc[idx]['P6-CT'],stats_BCM.iloc[idx]['unasigned']])

lab = ['BC', 'MC', 'BPC', 'NGC', '23P', '4P', '5P-NP', '5P-ET', '5P-IT','6P-IT','6P-CT','unasigned']

ax.bar(lab,vals)
plt.show()

In [None]:
stats_BCM['dummy'] = 1

In [None]:
#Make violin plot

import seaborn as sns
import matplotlib.pyplot as plt

#sns.set(style="whitegrid")


f, ax = plt.subplots(figsize=(10,8))

#ax.set(xlim=(0, 0.9))

ax = sns.swarmplot(data = stats_BCM.query('P5_total_syn > 50'), x='dummy', y="percent_5P_ET", size=8, hue="syn_from_et", palette="viridis")
ax.set(ylim=(0, 0.9))
plt.show()

plt.savefig('swarm.eps')

In [None]:
stats_BCM.query('percent_5P_ET < 0.5 and P5_total_syn > 50 and syn_from_et > 20' ).ID.unique()

In [None]:
stats_BCM.query('P5_total_syn > 50').syn_from_et


In [None]:
len(Brendan_synapses.presyn_nucleus_id.unique())

In [None]:
manual_multi_df[manual_multi_df['synapse_id'] == 127538363]