In [None]:
"""
makes robot template to generate new local neuron cell types 
based on type:ID mapping in 'new_LNs.tsv' (does not use 'FBbt_name' in file)
"""
import pandas as pd
import csv
import re
from num2words import num2words
from collections import OrderedDict
import numpy as np

# load file with types mapped to FBbt:2... IDs, labels, refs
cell_types = pd.read_csv('./new_ALLNs.tsv', sep='\t')


In [None]:
#glomeruli - names matching Schlegel/Task
glomeruli = pd.read_csv('./glomerulus_names.tsv', sep='\t', index_col='name')
glomeruli_dict = glomeruli.to_dict(orient='dict')['FBbt_ID']

In [None]:
LN_pattern = re.compile("([lv]+[2]?)(LN)([0-9]+[A-Z]?)")

In [None]:
# dataframe of neuroblasts

nb_data = np.array([['FBbt:00067348', 'v', 'ALv1'], ['FBbt:00050035', 'v2', 'ALv2'], \
                    ['FBbt:00050038', 'lv', 'ALlv1'], ['FBbt:00067346', 'ad', 'ALad1'], \
                    ['FBbt:00067347', 'l', 'ALl1 (Notch OFF hemilineage)'], \
                    ['FBbt:00067347', 'l2', 'ALl1 (Notch ON hemilineage)']])
neuroblasts = pd.DataFrame(nb_data, columns=['ID', 'short', 'name'])
neuroblasts = neuroblasts.set_index('short')
#neuroblasts

In [None]:
patterns_dict = {'broad':'FBbt:00051500', 'regional':'FBbt:00049644', 
                 'sparse':'FBbt:00049647', 'patchy':'FBbt:00049646'}

In [None]:
def glomerulus_lister(glomeruli):
    """
    Takes a list of glomeruli and outputs them as a string in English
    """
    if (len(glomeruli) == 1) and (glomeruli[0] == 'VP'):
        glom_str = "the %s glomeruli" % glomeruli[0]
    elif (len(glomeruli) == 1):
        glom_str = "the %s glomerulus" % glomeruli[0]
    elif (len(glomeruli) > 1):
        glom_str = 'the '
        while (len(glomeruli) > 1):
            glom_str += ('%s, ' % glomeruli[0])
            glomeruli = glomeruli[1:]
        glom_str = glom_str.rstrip(', ')
        glom_str += (' and %s glomeruli' % glomeruli[0])
    
    return glom_str


In [None]:
def shortname_splitter(shortname):
    """
    Splits neuron names - at least one (uppercase) letter / three digits.
    """
    m = re.match(LN_pattern, shortname)
        
    if m: return m.groups()
    else:
        raise ValueError(shortname + "could not be split.")

In [None]:
def definition_maker(shortname, laterality, pattern, glomeruli):
    """
    Autogenerates term definition based on neuprint type name.
    """
    lineage = ("neuroblast %s" % neuroblasts['name'][shortname_splitter(shortname)[0]])
    
    group = shortname_splitter(shortname)[2]
    
    if laterality == 'uni':
        lat = "It is unilateral"
    elif laterality == 'bi':
        lat = "It is bilateral"
    
    pat = "and it has a %s arborization pattern" % pattern
        
    if glomeruli:
        glom = ", with strongest innervation in %s (Schlegel et al., 2021)." % glomerulus_lister(glomeruli)
    else:
        glom = " (Schlegel et al., 2021)."
    
    return str("Adult local neuron of the antennal lobe that develops from %s and belongs to group %s "
               "(Schlegel et al., 2021). %s %s%s"
               % (lineage, group, lat, pat, glom))


In [None]:
# Make a dictionary with key - column header & value = template specification (first row of table).

template_seed = OrderedDict([ ('ID' , 'ID'), ('CLASS_TYPE' , 'CLASS_TYPE'),\
                             ('RDF_Type' , 'TYPE' )])

#label, definition, short synonym:
template_seed.update([("label" , "A rdfs:label"), ("definition" , "A IAO:0000115"),\
                      ("Xref_def" , ">A oboInOwl:hasDbXref SPLIT=|"),\
                      ("created_by" , "A oboInOwl:created_by"),\
                      ("creation_date", "A oboInOwl:creation_date")])

#short name synonym
template_seed.update([("synonym" , "A oboInOwl:hasExactSynonym"),\
                      ("comment" , "A rdfs:comment")])

# Columns for relationships:
template_seed.update([("glomeruli", "SC 'has synaptic IO in region' some % SPLIT=|"),\
                      ("inputs_AL", "SC 'receives synaptic input in region' some %"),\
                      ("outputs_AL", "SC 'sends synaptic output to region' some %"),\
                      ("parent", "SC %"), ("arbor_type", "SC %"),\
                      ("neuroblast", "SC 'develops from' some %"),\
                      ("hemilineage", "SC %")])

# Create dataFrame for template
template = pd.DataFrame.from_records([template_seed])

#template

In [None]:
count = 0 # first row
id_mapping = {} # dictionary of ids for types

for i in cell_types.index:

    row_od = OrderedDict([]) #new template row as an empty ordered dictionary
    for c in template.columns: #make columns and blank data for new template row
        row_od.update([(c , "")])
    
    np_type = cell_types['np_type'][i]
    
    #these are the same in each row
    row_od["CLASS_TYPE"] = "subclass"
    row_od["RDF_Type"] = "owl:Class"
    row_od["created_by"] = "http://orcid.org/0000-0002-1373-1705"
    row_od["comment"] = str("Cell type described based on Janelia hemibrain data (Schlegel et al., 2021).")
    row_od["parent"] = "FBbt:00007390"  # adult antennal lobe local neuron
    row_od["inputs_AL"] = "FBbt:00007401"  # adult antennal lobe
    row_od["outputs_AL"] = "FBbt:00007401"  # adult antennal lobe

    #easy to generate data
    row_od["ID"] = cell_types['FBbt_id'][i]
    row_od["synonym"] = ("adult antennal lobe local neuron type %s of neuroblast %s" 
                         % (shortname_splitter(np_type)[2], neuroblasts['name'][shortname_splitter(np_type)[0]]))
    row_od["label"] = "adult antennal lobe local neuron %s" % np_type
    row_od["Xref_def"] = cell_types['ref'][i]
    row_od["creation_date"] = cell_types['date'][i]
    row_od["neuroblast"] = neuroblasts['ID'][shortname_splitter(np_type)[0]]
    
    # definition
    if cell_types.notnull()['glomeruli'][i]:
        glom_list = str(cell_types['glomeruli'][i]).split(sep='|')
    else:
        glom_list = []
    
    row_od["definition"] = definition_maker(shortname=np_type, 
                                            laterality=cell_types['laterality'][i], 
                                            pattern=cell_types['pattern'][i], 
                                            glomeruli=glom_list)
    
    # conditional columns
    if glom_list:
        glom_ids = [glomeruli_dict[g] for g in glom_list]
        glom_ids_str = '|'.join(glom_ids)
        row_od["glomeruli"] = glom_ids_str
    
    if cell_types['pattern'][i] in patterns_dict.keys():
        row_od["arbor_type"] = patterns_dict[cell_types['pattern'][i]]
        
    if 'Notch OFF' in row_od["definition"]:
        row_od["hemilineage"] = 'FBbt:00049540'
    elif 'Notch ON' in row_od["definition"]:
        row_od["hemilineage"] = 'FBbt:00049539'
            
    
    #make new row into a DataFrame and add it to template
    new_row = pd.DataFrame.from_records([row_od])
    template = pd.concat([template, new_row], ignore_index=True, sort=False)

    count +=1
    
    
#template.head()

In [None]:
template.to_csv("./template.tsv", sep = "\t", header=True, index=False)