In [None]:
# This is to generate NEW cell types - for updates use update_new_types.ipynb

import pandas as pd
import csv
import re
from num2words import num2words
from collections import OrderedDict

# check last FBbt:2... id
start_id = 'FBbt:20003734'

# copy across latest mapping files from hemibrain_metadata

In [None]:
# load ROI mapping, remove left/right, keep only capitalised regions

roi_mapping = pd.read_csv('./hemibrain_1-1_ROI_mapping.tsv', sep='\t')
roi_mapping['ROI'] = roi_mapping['ROI'].map(
    lambda x: re.compile('\([LR]+\)').sub('',x))
roi_mapping = roi_mapping[roi_mapping['ROI'].str.match('[A-Z]+$')==True]\
    .drop_duplicates().reset_index(drop=True)


In [None]:
# add entry for 'posterior slope' and 'clamp' (not in hemibrain ROIs?)

roi_mapping = roi_mapping.append({'ROI': 'PS', 'FBbt_id': 'FBbt:00040072', 'FBbt_name': 'posterior slope'}, 
                   ignore_index=True)
roi_mapping = roi_mapping.append({'ROI': 'CL', 'FBbt_id': 'FBbt:00040047', 'FBbt_name': 'clamp'}, 
                   ignore_index=True)

In [None]:
# load types mapping, remove any already mapped, drop _a,b,c subtypes

type_mapping = pd.read_csv('./hemibrain_1-1_type_mapping.tsv', sep='\t')
new_types = type_mapping[type_mapping['FBbt_id'].isnull()]['np_type']
new_types = new_types.map(lambda x: re.compile('_[a-z]').sub('',x))
new_types = new_types.drop_duplicates().reset_index(drop=True)


In [None]:
# get unmapped types where name is ROI followed by 3 numbers

new_types = new_types[new_types.map(
    lambda x: re.compile('[0-9][0-9][0-9]$').sub('',x)).isin(list(roi_mapping['ROI']))]


In [None]:
# Make a dictionary with key - column header & value = template specification (first row of table).

template_seed = OrderedDict([ ('ID' , 'ID'), ('CLASS_TYPE' , 'CLASS_TYPE'),\
                             ('RDF_Type' , 'TYPE' )])

#label, definition, short synonym:
template_seed.update([("Name" , "A rdfs:label"), ("definition" , "A IAO:0000115"),\
                      ("Xref_def" , ">A oboInOwl:hasDbXref"),\
                      ("created_by" , "AI dc:contributor"),\
                      ("creation_date", "AT dc:date^^xsd:dateTime")])

#short name synonym
template_seed.update([("Synonym" , "A oboInOwl:hasExactSynonym"),\
                      ("syn_ref" , ">A oboInOwl:hasDbXref"),\
                      ("comment" , "A rdfs:comment")])

# Columns for tract superclass:
template_seed.update([("synapses" , "SC 'has synaptic IO in' some %"),\
                      ("adult_neuron", "SC %")])

# Create dataFrame for template
template = pd.DataFrame.from_records([template_seed])

#template

In [None]:
def shortname_splitter(shortname):
    """
    Splits neuron names - at least one (uppercase) letter / three digits.
    """
    m = re.match("([A-Z]+)([0-9][0-9][0-9]$)",shortname)
    if m: return m.groups()
    else:
        raise ValueError("Invalid neuron name.")

In [None]:
def label_maker(shortname):
    neuropil = str(list(roi_mapping[roi_mapping['ROI'] == shortname_splitter(shortname)[0]]['FBbt_name'])[0])
    neuropil = neuropil.replace('adult ', '')
    return "adult %s neuron %s" % (neuropil, shortname_splitter(shortname)[1])

In [None]:
def id_advance(start_id, increase):
    """
    Advances an id string ('start_id') e.g. 'FBbt:00000000' by 'increase' and returns a string.
    """
    start_number = start_id.split(sep=':')[1]
    new_number = str(int(start_number) + increase).zfill(8)
    output_id = start_id.split(sep=':')[0] + ':' + new_number
    return output_id

In [None]:
count = 0 # first row
id_mapping = {} # dictionary of ids for types

for i in new_types.index:

    r = new_types[i]
    row_od = OrderedDict([]) #new template row as an empty ordered dictionary
    for c in template.columns: #make columns and blank data for new template row
        row_od.update([(c , "")])
    
    #these are the same in each row
    row_od["CLASS_TYPE"] = "subclass"
    row_od["RDF_Type"] = "owl:Class"
    row_od["Xref_def"] = "doi:10.1101/2020.04.07.030213"
    row_od["syn_ref"] = "doi:10.1101/2020.04.07.030213"
    row_od["created_by"] = "http://orcid.org/0000-0002-1373-1705"
    row_od["creation_date"] = "2020-07-30T12:00:00Z"
    row_od["definition"] = '.'
    row_od["comment"] = "Poorly-characterized cell type from Janelia hemibrain data (Scheffer et al., 2020)."
    row_od["adult_neuron"] = "FBbt:00047095"

    #easy to generate data
    row_od["ID"] = id_advance(start_id, count)
    row_od["Synonym"] = "adult %s neuron" % r
    row_od["Name"] = label_maker(r)
    row_od["synapses"] = str(list(roi_mapping[roi_mapping['ROI'] == shortname_splitter(r)[0]]['FBbt_id'])[0])
    
    #make new row into a DataFrame and add it to template
    new_row = pd.DataFrame.from_records([row_od])
    template = pd.concat([template, new_row], ignore_index=True, sort=False)

    count +=1
    
    id_mapping[new_types[i]] = row_od["ID"] # add to dictionary
    
#template

In [None]:
template.to_csv("./template.tsv", sep = "\t", header=True, index=False)

In [None]:
# add FBbt IDs to hemibrain mapping file
type_mapping['shortname'] = type_mapping['np_type'].map(lambda x: re.compile('_[a-z]').sub('',x))


In [None]:
for i in type_mapping.index:
    if type_mapping['FBbt_id'].isnull()[i]:
        try:
            type_mapping['FBbt_id'][i] = id_mapping[type_mapping['shortname'][i]]
        except KeyError:
            continue


In [None]:
type_mapping = type_mapping.drop(['shortname'], axis=1)


In [None]:
type_mapping.to_csv('./hemibrain_1-1_type_mapping.tsv', sep='\t', index=None)