In [None]:
"""
makes robot template to generate provisional cell types 
based on type:ID mapping in 'new_cell_types.tsv' (does not use 'FBbt_name' in file)
may need to copy across latest hemibrain ROI mapping if this gets updated
"""
import pandas as pd
import csv
import re
from num2words import num2words
from collections import OrderedDict
import numpy as np

# load file with types mapped to FBbt:2... IDs, labels, refs
cell_types = pd.read_csv('./new_cell_types.tsv', sep='\t')

# copy across latest ROI mapping file from hemibrain_metadata

In [None]:
# load ROI mapping, remove left/right, keep only capitalised regions

roi_mapping = pd.read_csv('./hemibrain_1-1_ROI_mapping.tsv', sep='\t')
roi_mapping['ROI'] = roi_mapping['ROI'].map(
    lambda x: re.compile('\([LR]+\)').sub('',x))
roi_mapping = roi_mapping[roi_mapping['ROI'].str.match('[A-Z]+$')==True]\
    .drop_duplicates().reset_index(drop=True)

In [None]:
# add entry for 'posterior slope' and 'clamp' (not in hemibrain ROIs?)

roi_mapping = roi_mapping.append({'ROI': 'PS', 'FBbt_id': 'FBbt:00040072', 'FBbt_name': 'posterior slope'}, 
                   ignore_index=True)
roi_mapping = roi_mapping.append({'ROI': 'CL', 'FBbt_id': 'FBbt:00040047', 'FBbt_name': 'clamp'}, 
                   ignore_index=True)

In [None]:
# patterns for different types of neuprint label (used by functions)
TI_pattern = re.compile("([A-Z]+)([0-9][0-9][0-9]$)")
multiPN_pattern = re.compile("(M_)([lvad]+[2]?)(PN)([0-9]*[mlt]+)([0-9]+[A-Z]?)")

In [None]:
# dataframe of neuroblasts

nb_data = np.array([['FBbt:00067348', 'v', 'ALv1'], ['FBbt:00050035', 'v2', 'ALv2'], \
                    ['FBbt:00050038', 'lv', 'ALlv1'], ['FBbt:00067346', 'ad', 'ALad1'], \
                    ['FBbt:00067347', 'l', 'ALl1 (Notch OFF hemilineage)'], \
                    ['FBbt:00067347', 'l2', 'ALl1 (Notch ON hemilineage)']])
neuroblasts = pd.DataFrame(nb_data, columns=['ID', 'short', 'name'])
neuroblasts = neuroblasts.set_index('short')
#neuroblasts

In [None]:
# dataframe of tracts

tract_data = np.array([['FBbt:00003985', 'm', 'medial antennal lobe tract'], \
                       ['FBbt:00003983', 'l', 'lateral antennal lobe tract'], \
                       ['FBbt:00003984', 'ml', 'mediolateral antennal lobe tract'], \
                       ['FBbt:00049719', '10t', 'transverse antennal lobe t10ALT tract']])
tracts = pd.DataFrame(tract_data, columns=['ID', 'short', 'name'])
tracts = tracts.set_index('short')
#tracts

In [None]:
def type_checker(shortname):
    if re.match(TI_pattern,shortname):
        return 'TI'
    elif re.match(multiPN_pattern,shortname):
        return 'multi'
    else:
        raise ValueError("Invalid neuron name - " + shortname)

In [None]:
def neuropil_writer(shortname):
    neuropil = str(list(roi_mapping[roi_mapping['ROI'] == shortname_splitter(shortname)[0]]['FBbt_name'])[0])
    return neuropil.replace('adult ', '')

In [None]:
# functions for terra incognita neurons

def shortname_splitter(shortname):
    """
    Splits neuron names - at least one (uppercase) letter / three digits.
    """
    name_type = type_checker(shortname)
    if name_type == 'TI':
        m = re.match(TI_pattern, shortname)
    elif name_type == 'multi':
        m = re.match(multiPN_pattern, shortname)
        
    if m: return m.groups()
    else:
        raise ValueError(shortname + "could not be split.")


In [None]:

def label_maker(shortname):
    """
    Autogenerates term label based on neuprint type name.
    """
    if type_checker(shortname) == 'TI':
        neuropil = neuropil_writer(shortname)
        return "adult %s neuron %s" % (neuropil, shortname_splitter(shortname)[1])
    elif type_checker(shortname) == 'multi':
        return "adult multiglomerular antennal lobe projection neuron type %s %sPN" % \
            (shortname_splitter(shortname)[4], shortname_splitter(shortname)[1])
    else:
        raise ValueError("Could not make label for " + shortname)


In [None]:
def definition_maker(shortname):
    """
    Autogenerates term definition based on neuprint type name.
    """
    if type_checker(shortname) == 'TI':
        return str("Adult neuron belonging to group %s of the terra incognita neurons with substantial synapsing "
            "in the %s (Scheffer et al., 2020)." % (shortname_splitter(shortname)[1], neuropil_writer(shortname)))
    elif type_checker(shortname) == 'multi':
        return str("Adult multiglomerular antennal lobe projection neuron belonging to group %s "
                   "(Scheffer et al., 2020). These neurons develop from neuroblast %s and follow "
                   "the %s (Bates et al., 2020; Scheffer et al., 2020)." 
                   % (shortname_splitter(shortname)[4], neuroblasts['name'][shortname_splitter(shortname)[1]], 
                   tracts['name'][shortname_splitter(shortname)[3]]))
    else:
        raise ValueError("Could not make definition for " + shortname)

In [None]:
# Make a dictionary with key - column header & value = template specification (first row of table).

template_seed = OrderedDict([ ('ID' , 'ID'), ('CLASS_TYPE' , 'CLASS_TYPE'),\
                             ('RDF_Type' , 'TYPE' )])

# fields for obo ID and namespace
template_seed.update([("obo_id" , "A oboInOwl:id"), ("obo_namespace" , "A oboInOwl:hasOBONamespace")])

#label, definition, short synonym:
template_seed.update([("label" , "A rdfs:label"), ("definition" , "A IAO:0000115"),\
                      ("Xref_def" , ">A oboInOwl:hasDbXref SPLIT=|"),\
                      ("created_by" , "A oboInOwl:created_by"),\
                      ("creation_date", "A oboInOwl:creation_date")])

#short name synonym
template_seed.update([("synonym" , "A oboInOwl:hasExactSynonym"),\
                      ("syn_ref" , ">A oboInOwl:hasDbXref"),\
                      ("comment" , "A rdfs:comment")])

# Columns for relationships:
template_seed.update([("synapses", "SC 'has synaptic IO in region' some %"),\
                      ("inputs", "SC 'receives synaptic input in region' some %"),\
                      ("parent", "SC %"), ("neuroblast", "SC 'develops from' some %"),\
                      ("tract", "SC 'fasciculates with' some %"), ("hemilineage", "SC %")])

# Create dataFrame for template
template = pd.DataFrame.from_records([template_seed])

#template

In [None]:
count = 0 # first row
id_mapping = {} # dictionary of ids for types

for i in cell_types.index:

    row_od = OrderedDict([]) #new template row as an empty ordered dictionary
    for c in template.columns: #make columns and blank data for new template row
        row_od.update([(c , "")])
    
    np_type = cell_types['np_type'][i]
    
    #these are the same in each row
    row_od["CLASS_TYPE"] = "subclass"
    row_od["RDF_Type"] = "owl:Class"
    row_od["Xref_def"] = cell_types['ref'][i]
    row_od["syn_ref"] = cell_types['ref'][i]
    row_od["created_by"] = "http://orcid.org/0000-0002-1373-1705"
    row_od["creation_date"] = cell_types['date'][i]
    row_od["comment"] = str("Uncharacterized putative cell type (based on clustering analysis) "
        "from Janelia hemibrain data (Scheffer et al., 2020).")
    row_od['obo_namespace'] = "fly_anatomy.ontology"

    #easy to generate data
    row_od["ID"] = cell_types['FBbt_id'][i]
    row_od['obo_id'] = cell_types['FBbt_id'][i]
    row_od["synonym"] = "adult %s neuron" % np_type
    row_od["label"] = label_maker(np_type)
    row_od["definition"] = definition_maker(np_type)
    
    
    # conditional columns
    if type_checker(np_type) == 'TI':
        row_od["parent"] = "FBbt:00047095" # adult neuron
        row_od["synapses"] = str(list(roi_mapping[roi_mapping['ROI'] == shortname_splitter(np_type)[0]]['FBbt_id'])[0])
        
    if type_checker(np_type) == 'multi':
        row_od["parent"] = "FBbt:00007441" # adult multiglomerular antennal lobe projection neuron
        row_od["neuroblast"] = neuroblasts['ID'][shortname_splitter(np_type)[1]]
        row_od["tract"] = tracts['ID'][shortname_splitter(np_type)[3]]
        row_od["inputs"] = str(list(roi_mapping[roi_mapping['ROI'] == 'AL']['FBbt_id'])[0])
        row_od["Xref_def"]+=("|FlyBase:FBrf0246460")
        
    if 'Notch OFF' in row_od["definition"]:
        row_od["hemilineage"] = 'FBbt:00049540'
    elif 'Notch ON' in row_od["definition"]:
        row_od["hemilineage"] = 'FBbt:00049539'
            
    
    #make new row into a DataFrame and add it to template
    new_row = pd.DataFrame.from_records([row_od])
    template = pd.concat([template, new_row], ignore_index=True, sort=False)

    count +=1
    
    
#template.head()

In [None]:
template.to_csv("./template.tsv", sep = "\t", header=True, index=False)