In [None]:
import pandas as pd
import re
from collections import OrderedDict
from itertools import chain
import numpy as np
import neuprint
from oaklib import get_adapter

In [None]:
# for connecting to neuPrint (add token)
token = ""
np_client = neuprint.Client('https://neuprint.janelia.org', dataset='optic-lobe:v1.1', token=token)

In [None]:
fbbt = get_adapter("../../../ontology/tmp/fbbt-merged.db")

In [None]:
# Load mapping
cell_types = pd.read_csv(
    "new_types.tsv",
    sep="\t",
    low_memory=False,
    index_col='OL_type'
    )

# minimum no. synapses to add region connectivity
connectivity_threshold = 10

# parent_types
parent_types = pd.read_csv(
    "broad_type_map.tsv",
    sep="\t",
    low_memory=False,
    index_col='broad_type'
    )

# hemilineage (from fw template folder)
hemilineage_map = pd.read_csv(
    "../flywire_neurons/lineage_map.tsv",
    sep="\t",
    low_memory=False
    )

# Load rois
OL_rois = pd.read_csv('OL_ROI_mapping.tsv', sep='\t')
# dictionary of raw ROI names to FBbt
raw_ROI_dict = dict(zip(OL_rois['ROI'],OL_rois['FBbt_id']))
OL_rois_FBbt = OL_rois[['FBbt_id', 'FBbt_name']].drop_duplicates()
ROI_name_dict = dict(zip(OL_rois_FBbt['FBbt_id'],OL_rois_FBbt['FBbt_name']))

In [None]:
# dict of term descendants (SLOW)
FBbt_descendants = {i:[x[0] for x in fbbt.relationships(objects=[i], predicates=['BFO:0000050'], include_entailed=True)] \
                    for i in ROI_name_dict.keys()}
#FBbt_descendants['FBbt:00003748']

In [None]:
#nts
nt_cvs = {'acetylcholine':'GO:0014055',
          'GABA':'GO:0061534',
          'glutamate':'GO:0061535',}

In [None]:
# get type info from neuprint
query = ("MATCH (n:Neuron) WHERE n.type IN %s "
         "RETURN n.type AS type, n.bodyId AS bodyID, n.instance AS instance, "
         "apoc.convert.fromJsonMap(n.roiInfo) AS ROIs" 
         % cell_types.index.tolist())

raw_results = np_client.fetch_custom(query)


In [None]:
# get hemilineage info from neuprint (not clear where this came from - not in Nern paper)
query = ("MATCH (n:Neuron) WHERE n.type IN %s AND EXISTS(n.hemilineage)"
         "RETURN  DISTINCT n.type AS OL_type, n.hemilineage AS hemilineage" 
         % cell_types.index.tolist())

hemilineages = np_client.fetch_custom(query)
#hemilineages.head()

In [None]:
#map hemilineages to FBbt - check for any not in flywire mapping file
mapped_hemilineages = hemilineages.merge(hemilineage_map, how='left', left_on='hemilineage', right_on='ito_lee_hemilineage')
unmapped_hemilineages = [i for i in hemilineages['hemilineage'].to_list() if not i in mapped_hemilineages['hemilineage'].to_list()]
mapped_hemilineages = mapped_hemilineages[['OL_type', 'NB_id', 'hemilineage']].set_index('OL_type').rename(columns={'NB_id':'neuroblast'})
mapped_hemilineages = mapped_hemilineages[mapped_hemilineages['hemilineage']!='putative_primary']
unmapped_hemilineages

In [None]:
# add to cell type info
cell_types = cell_types.merge(mapped_hemilineages['neuroblast'], how='left', left_index=True, right_index=True)
#cell_types.head()

In [None]:
# preserve to not have to redo query
np_results = raw_results

In [None]:
# get sides
def find_side(label):
    R_pattern = re.compile('[_(]R[_)]?')
    L_pattern = re.compile('[_(]L[_)]?')
    if re.search(R_pattern, label):
        side = 'right'
    elif re.search(L_pattern, label):
        side = 'left'
    else:
        side = 'no_side'
    return side


In [None]:
np_results['cell_side'] = np_results['instance'].apply(find_side)
np_results = np_results.drop('instance', axis=1)
#np_results.head()

In [None]:
# set multiindex
region_data = np_results.set_index(['type', 'bodyID', 'cell_side'])
#print(region_data.head())

In [None]:
# stack ROIs into index
connecivity_by_ROI = region_data.ROIs.apply(pd.Series)
connecivity_by_ROI = connecivity_by_ROI.stack(future_stack=True)
connecivity_by_ROI.index = connecivity_by_ROI.index.rename(['type', 'bodyID','cell_side','ROI_np'])

# find ROIs that do/don't correspond to OL columns
all_rois = connecivity_by_ROI.index.get_level_values('ROI_np').drop_duplicates().to_list()
column_ROIs = [x for x in all_rois if '_col_' in x]
non_column_ROIs = [x for x in all_rois if not '_col_' in x]
# check that non_column rois are all in mapping
missing_rois = [x for x in non_column_ROIs if not x in OL_rois['ROI'].to_list()]
missing_rois

In [None]:
# remove any OL column ROIs from data
connecivity_by_ROI_1 = connecivity_by_ROI.drop(column_ROIs, level='ROI_np')
#print(connecivity_by_ROI_1.head())

In [None]:
# split out connectivity type (pre, post etc) into columns (SLOW)
connectivity_table_1 = connecivity_by_ROI_1.apply(pd.Series)
#print(connectivity_table_1.head())

In [None]:
# map neuprint ROIs to FBbt and tidy up
connectivity_table = connectivity_table_1.reset_index(level='ROI_np', drop=False)
connectivity_table['ROI'] = connectivity_table['ROI_np'].map(raw_ROI_dict)
connectivity_table['np_side'] = connectivity_table['ROI_np'].apply(find_side)
connectivity_table = connectivity_table.drop(labels = ['downstream', 'upstream', 'synweight', 0, 'ROI_np'], axis=1)
connectivity_table = connectivity_table.set_index('ROI', append=True)
connectivity_table = connectivity_table.fillna(0)
#connectivity_table.head()

In [None]:
# determine laterality
def laterality(colA,colB):
    if 'no_side' not in [colA,colB]:
        if colA==colB:
            laterality = 'ipsilateral'
        else:
            laterality = 'contralateral'
    else:
        laterality = 'no_laterality'
    return laterality


In [None]:
connectivity_laterality = connectivity_table.reset_index(level='cell_side')
connectivity_laterality['laterality'] = connectivity_laterality.apply(lambda x: laterality(x.cell_side, x.np_side), axis=1)
connectivity_laterality = connectivity_laterality.drop(labels = ['cell_side', 'np_side'], axis=1)
#connectivity_laterality.head()

In [None]:
# get max connectivity per region per bodyID (groups duplicate regions)
body_connectivity_table = connectivity_laterality.groupby(['type', 'bodyID','laterality','ROI']).agg({'post':'max', 'pre':'max'})

# get min connectivity per region per type (groups multiple bodies per type)
type_connectivity_table = body_connectivity_table.groupby(['type', 'laterality', 'ROI']).agg({'post':'min', 'pre':'min'})

# drop rows where minimum for pre and post are both 0
type_connectivity_table = type_connectivity_table.drop(type_connectivity_table[type_connectivity_table['post'].eq(0) & type_connectivity_table['pre'].eq(0)].index)


In [None]:
# drop redundant FBbt terms
def drop_redundant_terms(term_list):
    non_redundant_terms = []
    for i in term_list:
        term_set = set(term_list) - {i}
        if len(term_set.intersection(set(FBbt_descendants[i]))) == 0:
            non_redundant_terms.append(i)
    return non_redundant_terms
#drop_redundant_terms(['FBbt:00003701', 'FBbt:00003748'])

In [None]:
# split to input and output, threshold, and drop redundant

connectivity_inputs = type_connectivity_table.loc[type_connectivity_table.loc[:,'post']>connectivity_threshold, 'post']
connectivity_inputs = connectivity_inputs.reset_index('ROI').drop(columns=['post'], axis=1)
connectivity_inputs_lat = connectivity_inputs.groupby(['type', 'laterality'])['ROI'].apply(list)
connectivity_inputs_lat = connectivity_inputs_lat.apply(drop_redundant_terms)
connectivity_inputs_nolat = connectivity_inputs.groupby('type')['ROI'].apply(list)
connectivity_inputs_nolat = connectivity_inputs_nolat.apply(drop_redundant_terms)

connectivity_outputs = type_connectivity_table.loc[type_connectivity_table.loc[:,'pre']>connectivity_threshold, 'pre']
connectivity_outputs = connectivity_outputs.reset_index('ROI').drop(columns=['pre'], axis=1)
connectivity_outputs_lat = connectivity_outputs.groupby(['type', 'laterality'])['ROI'].apply(list)
connectivity_outputs_lat = connectivity_outputs_lat.apply(drop_redundant_terms)
connectivity_outputs_nolat = connectivity_outputs.groupby('type')['ROI'].apply(list)
connectivity_outputs_nolat = connectivity_outputs_nolat.apply(drop_redundant_terms)


In [None]:
connectivity_regions_lat = connectivity_inputs_lat.to_frame(name='inputs').merge(connectivity_outputs_lat.to_frame(name='outputs'), 
                                                                       how='outer', 
                                                                       left_index=True, 
                                                                       right_index=True)
#connectivity_regions_lat.head()

In [None]:
laterality = connectivity_regions_lat.reset_index('laterality').drop(labels = ['inputs', 'outputs'], axis=1)
laterality = laterality.groupby('type')['laterality'].apply(list)
#laterality.to_csv('laterality.csv')

In [None]:
def name_lister(names):
    L = ""
    if len(names) < 1:
        return False
    elif len(names) == 1:
        return names[0]
    elif len(names) > 1:
        L = names[0]
        if len(names) > 2:
            for i in names[1:-1]:
                L = L + ", " + i
        L = L + " and " + names[-1]
        return L

In [None]:
def get_region_ids(OL_type, laterality, polarity):
    try:
        regions = connectivity_regions_lat.loc[(OL_type, laterality), polarity]
        if type(regions)==list:
            return regions
        else:
            return False
    except(KeyError):
        return False

#get_region_ids('LT63', 'contralateral', 'inputs')

In [None]:
def neuropil_writer(FBbt_ids):
    """returns neuropil names for a list of FBbt_IDs"""
    neuropil_name_list = [ROI_name_dict[i].replace('adult ', '') for i in FBbt_ids]
    neuropil_name_string = name_lister(neuropil_name_list)
    return neuropil_name_string
    
#neuropil_writer(get_region_ids('Cm1', 'ipsilateral', 'inputs'))

In [None]:
def get_name_trunk(OL_type):
    name_trunk = re.match('[A-z]+', OL_type)[0]
    return name_trunk

In [None]:
def label_writer(OL_type):
    broad_type = parent_types.loc[get_name_trunk(OL_type), 'text']
    label = f"adult {broad_type} neuron {OL_type}"
    return label
#label_writer('Cm15')

In [None]:
def def_writer(OL_type):
    """Broad type, innervation, nt, cell number, jigsaw"""
    broad_type = parent_types.loc[get_name_trunk(OL_type), 'text']

    ipsi_post_regions = get_region_ids(OL_type, 'ipsilateral', 'inputs')
    contra_post_regions = get_region_ids(OL_type, 'contralateral', 'inputs')
    nolat_post_regions = get_region_ids(OL_type, 'no_laterality', 'inputs')
    ipsi_pre_regions = get_region_ids(OL_type, 'ipsilateral', 'outputs')
    contra_pre_regions = get_region_ids(OL_type, 'contralateral', 'outputs')
    nolat_pre_regions = get_region_ids(OL_type, 'no_laterality', 'outputs')

    if any([ipsi_post_regions, contra_post_regions, nolat_post_regions]):
        input_regions = []
        if ipsi_post_regions:
            input_regions.append(f" the ipsilateral {neuropil_writer(ipsi_post_regions)}")
        if contra_post_regions:
            input_regions.append(f" the contralateral {neuropil_writer(contra_post_regions)}")
        if nolat_post_regions:
            input_regions.append(f" the {neuropil_writer(nolat_post_regions)}")

        if len(input_regions) <3:
            input_def = f" It receives input in{', and'.join(input_regions)} (Nern et al., 2025)."
        else:
            input_def = f" It receives input in{input_regions[0]},{input_regions[1]}, and{input_regions[2]} (Nern et al., 2025)."    
    else:
        input_def = ''

    if any([ipsi_pre_regions, contra_pre_regions, nolat_pre_regions]):
        output_regions = []
        if ipsi_pre_regions:
            output_regions.append(f" the ipsilateral {neuropil_writer(ipsi_pre_regions)}")
        if contra_pre_regions:
            output_regions.append(f" the contralateral {neuropil_writer(contra_pre_regions)}")
        if nolat_pre_regions:
            output_regions.append(f" the {neuropil_writer(nolat_pre_regions)}")

        if len(output_regions) <3:
            output_def = f" It sends output to{', and'.join(output_regions)} (Nern et al., 2025)."
        else:
            output_def = f" It sends output to{output_regions[0]},{output_regions[1]}, and{output_regions[2]} (Nern et al., 2025)."
    else:
        output_def = ''

    try:
        hemilineage = (f" It belongs to the {mapped_hemilineages.loc[OL_type, 'hemilineage'].replace('_',' ')}"
                       f" hemilineage (Nern et al., 2025).")
    except(KeyError):
        hemilineage = ''
    
    if cell_types.loc[OL_type, 'matched_as']=='1-to-1':
        av_cells = cell_types.loc[OL_type, ['OL', 'Matsliah']].mean()
    else:
        av_cells = cell_types.loc[OL_type, 'OL']
    
    definition = f"Adult {broad_type} neuron of the {OL_type} group (Nern et al., 2025)."
    definition += hemilineage
    definition += input_def
    definition += output_def
    if cell_types['transmitter_pred'].notna().loc[OL_type]:
        definition += (f" Its predicted neurotransmitter is {cell_types.loc[OL_type, 'transmitter_pred']} "
                        "(Eckstein et al., 2024; Nern et al., 2025).")
    if int(round(av_cells, 0)) == 1:
        definition += f" There is one of these cells per hemisphere (Nern et al., 2025)."
    else:
        definition += f" There are approximately {int(round(av_cells, 0))} of these cells per hemisphere (Nern et al., 2025)."
    if cell_types.loc[OL_type, 'jigsaw']=='y':
        definition += " The pair of them form a jigsaw pattern, tiling the neuropil (Matsliah et al., 2024)."
    return definition
#def_writer('OLVC2')

In [None]:
def comment_writer(OL_type):
    if cell_types.loc[OL_type, 'LM'] == 'y':
        light = ' and at light level'
    else:
        light = ''
    if cell_types.loc[OL_type, 'matched_as'] == 'unmatched':
        comment = f"Cell type identified in one EM dataset{light} (Nern et al., 2025)."
    else:
        comment = f"Cell type identified in multiple EM datasets{light} (Schlegel et al., 2024 - FBrf0260535; Nern et al., 2025)."

    comment += (f" Synapse locations are given where each cell of this type in "
                f"neuprint has at least {connectivity_threshold} connections.")

    if OL_type in mapped_hemilineages.index:
        comment += (f" Hemilineage information from neuprint, though original source not clear "
                   f"- possibly from FlyWire data (Schlegel et al., 2024 - FBrf0260535).")
        
    if OL_type == 'LoVP26':
        comment += (" This cell type was originally hemibrain PS179 (Scheffer et al., 2020 - FBrf0246888)."
                    " Modification to LoVP26 is a slight broadening of meaning.")

    comment += f" Other information from Nern et al. (2025) supplements."
    return comment
#comment_writer('Cm18')

In [None]:
# IDs namespace, creation
cell_types['obo_id'] = cell_types['FBbt_id']
cell_types['obo_namespace'] = "fly_anatomy.ontology"
cell_types['created_by'] = "http://orcid.org/0000-0002-1373-1705"
#cell_types.head()

In [None]:
# labels, defs, comments
cell_types['label'] = cell_types.index.to_series().apply(label_writer)
cell_types['definition'] = cell_types.index.to_series().apply(def_writer)
cell_types['Xref_def'] = "doi:10.1038/s41586-025-08746-0"
cell_types['comment'] = cell_types.index.to_series().apply(comment_writer)
#cell_types.head()

In [None]:
# reference cols
cell_types.loc[cell_types['Matsliah_type'].notnull(), 'Matsliah_ref'] = 'FlyBase:FBrf0260545|doi:10.1038/s41586-025-08746-0'
cell_types.loc[cell_types['Schlegel_type'].notnull(), 'Schlegel_ref'] = 'FlyBase:FBrf0260535|doi:10.1038/s41586-025-08746-0'
cell_types.loc[cell_types['hemibrain_type'].notnull(), 'hemibrain_ref'] = 'FlyBase:FBrf0246888|doi:10.1038/s41586-025-08746-0'
cell_types.loc[cell_types['transmitter_pred'].notnull(), 'Xref_def'] = 'FlyBase:FBrf0259490|doi:10.1038/s41586-025-08746-0'
cell_types.loc[cell_types['jigsaw'].notnull(), 'Xref_def'] = 'FlyBase:FBrf0260545|doi:10.1038/s41586-025-08746-0'
cell_types.loc[cell_types['jigsaw'].notnull() & cell_types['transmitter_pred'].notnull(), 'Xref_def'] = \
    'FlyBase:FBrf0259490|doi:10.1038/s41586-025-08746-0|FlyBase:FBrf0260545'
#cell_types.head()

In [None]:
# neurotransmitter
cell_types['NT'] = cell_types['transmitter_pred'].map(nt_cvs)
#cell_types.head()

In [None]:
# parents based on broad type
cell_types['broad_type'] = cell_types.index.to_series().apply(get_name_trunk)
cell_types = cell_types.reset_index(drop=False)
cell_types = cell_types.merge(parent_types['FBbt_id'].reset_index().rename(columns={'FBbt_id':'type_parent'}),
                              how='left', on='broad_type')
cell_types = cell_types.set_index('OL_type')
#cell_types.head()

In [None]:
cell_types['parents'] = cell_types.apply(lambda row: f"{row['additional_parents']}|{row['type_parent']}" \
                                         if pd.notna(row['additional_parents']) else row['type_parent'], axis=1)
#cell_types.head()

In [None]:
# innervation regions and laterality
cell_types['inputs'] = connectivity_inputs_nolat.apply(lambda x: '|'.join(x))
cell_types['outputs'] = connectivity_outputs_nolat.apply(lambda x: '|'.join(x))
cell_types['laterality'] = laterality.apply(lambda x: 'PATO:0000618' if 'contralateral' in x else '')

In [None]:
# Make header

template_seed = OrderedDict([ ('ID' , 'ID')])

# fields for obo ID and namespace
template_seed.update([("obo_id" , "A oboInOwl:id"), ("obo_namespace" , "A oboInOwl:hasOBONamespace")])

#label, definition, creation:
template_seed.update([("label" , "A rdfs:label"), ("definition" , "A IAO:0000115"),\
                      ("Xref_def" , ">A oboInOwl:hasDbXref SPLIT=|"),\
                      ("created_by" , "AI dc:contributor"),\
                      ("creation_date", "AT dc:date^^xsd:dateTime")])

#synonyms, comment:
template_seed.update([("Matsliah_type" , "A oboInOwl:hasRelatedSynonym SPLIT=|"),\
                      ("Matsliah_ref" , ">A oboInOwl:hasDbXref SPLIT=|"),\
                      ("Schlegel_type", "A oboInOwl:hasRelatedSynonym SPLIT=|"),\
                      ("Schlegel_ref", ">A oboInOwl:hasDbXref SPLIT=|"),\
                      ("hemibrain_type", "A oboInOwl:hasRelatedSynonym SPLIT=|"),\
                      ("hemibrain_ref", ">A oboInOwl:hasDbXref SPLIT=|"),\
                      ("comment" , "A rdfs:comment")])

# Columns for relationships:
template_seed.update([("parents", "SC % SPLIT=|"),\
                      ("NT", "SC 'capable of' some %"),\
                      ("inputs", "SC 'receives synaptic input in region' some % SPLIT=|"),\
                      ("outputs", "SC 'sends synaptic output to region' some % SPLIT=|"),\
                      ("laterality", "SC 'has characteristic' some %"),\
                      ("neuroblast", "SC 'develops from' some %")])

# Create dataFrame for template header
template = pd.DataFrame.from_records([template_seed])

#template

In [None]:
# reformat and merge with header
cell_types = cell_types.rename(columns={'FBbt_id':'ID'})
cell_types = cell_types.drop(['OL', 'Matsliah', 'transmitter_pred', 'matched_as', 'additional_parents', 
                              'jigsaw', 'LM', 'broad_type', 'type_parent'], axis=1)

In [None]:
populated_template = pd.concat([template,cell_types])
#populated_template.head()

In [None]:
populated_template.to_csv('template.tsv', sep='\t', index=False)