In [None]:
import pandas as pd
import csv
import re
from num2words import num2words
from collections import OrderedDict

In [None]:
# load table, all values as strings, NaN -> empty string
new_types = pd.read_csv('./Feng.tsv', sep='\t', dtype='str').fillna('')

# convert segment and response to lists
new_types['Segments'] = new_types['Segments'].apply(lambda x: x.split(', '))
new_types['response to MDN'] = new_types['response to MDN'].apply(lambda x: x.split(', '))
new_types[20:30]

In [None]:
FBbt_dict = {'T1':'FBbt:00111083', 'T2':'FBbt:00111084', 'T3':'FBbt:00111085', 
             'adult ascending neuron':'FBbt:00048301', 'adult neuron': 'FBbt:00047095'}

In [None]:
def label_maker(cell_type, subtype=False, neuromere=None):
    if subtype:
        neuromere_dict = {'T1':'prothoracic', 'T2':'mesothoracic', 'T3':'metathoracic'}
        try:
            label = "adult %s %s neuron" % (neuromere_dict[neuromere], cell_type)
        except KeyError:
            print("Neuromere must be 'T1', 'T2' or 'T3' if subtype=True")
            return None
    else:
        label = "adult %s neuron of ventral nerve cord" % cell_type
    return label

In [None]:
def definition_writer(neuromeres, cell_type, response, subtype=False, number='', hetero='', ascending=''):
    neuromere_dict = {'T1':'prothoracic', 'T2':'mesothoracic', 'T3':'metathoracic'}
    pub = " (Feng et al., 2020)"
    response_dict = {'i':'inhibited', 'w':'weakly excited', 's':'strongly excited'}
    if len(neuromeres) == 1:
        nm = "with its soma in the %s neuromere" % neuromere_dict[neuromeres[0]]
    elif len(neuromeres) == 2:
        nm = "found in the %s and %s neuromeres" % (neuromere_dict[neuromeres[0]], 
                                                    neuromere_dict[neuromeres[1]])
    elif len(neuromeres) == 3:
        nm = "found in the %s, %s and %s neuromeres" % (neuromere_dict[neuromeres[0]], 
                                                        neuromere_dict[neuromeres[1]], 
                                                        neuromere_dict[neuromeres[2]])
    else:
        raise ValueError("'neuromeres' must be a list of length 1-3")
    
    if subtype:
        typing = "Adult %s neuron %s%s. " % (cell_type, nm, pub)
        try:
            MDN_response = ("It is %s downstream of moonwalker descending neuron activity%s. " 
                            % (response_dict[response[int(neuromeres[0].lstrip('T'))-1]], pub))
        except KeyError:
            MDN_response = ""
    else:
        if len(set(response)) == 1:
            MDN_response =  ("It is %s downstream of moonwalker descending neuron activity%s. " 
                            % (response_dict[response[0]], pub))
        else:
            MDN_response = ""
        
        if ascending == '1':
            typing = "Adult ascending neuron %s%s. " % (nm, pub)
        else:
            typing = "Adult neuron %s%s. " % (nm, pub)
        
    if len(number) > 0:
        if '~' in number:
            approx = 'are approximately '
        elif number == '1':
            approx = 'is '
        else:
            approx = 'are '
        num = number.lstrip('~')
        n = re.search(('[0-9]+'),num)
        for m in n.groups():
            num=num.replace(m, num2words(m))
        if hetero == '1':
            het = ' and they are heterogeneous'
        else:
            het = ''
        number = ("There %s%s of these cells per hemineuromere%s%s."
                  % (approx, num, het, pub))
        
        definition = typing + MDN_response + number
        definition.rstrip(' ')
    return definition 

In [None]:
# Make a dictionary with key - column header & value = template specification (first row of table).

template_seed = OrderedDict([('ID' , 'ID'), ('class_type', 'CLASS_TYPE')])

# fields for obo ID and namespace
template_seed.update([("obo_id" , "A oboInOwl:id"), ("obo_namespace" , "A oboInOwl:hasOBONamespace")])

#label, definition, short synonym:
template_seed.update([("Name" , "A rdfs:label"), ("definition" , "A IAO:0000115"),\
                      ("Xref_def" , ">A oboInOwl:hasDbXref"),\
                      ("created_by" , "A oboInOwl:created_by"),\
                      ("creation_date", "A oboInOwl:creation_date")])

#short name synonym
template_seed.update([("MFSynonym" , "A oboInOwl:hasExactSynonym"),\
                      ("MFsyn_ref" , ">A oboInOwl:hasDbXref"),\
                      ("MTSynonym" , "A oboInOwl:hasNarrowSynonym"),\
                      ("MTsyn_ref" , ">A oboInOwl:hasDbXref"),\
                      ("comment" , "A rdfs:comment")])

# Columns for relationships:
template_seed.update([("soma" , "C 'has soma location' some %"),\
                      ("parent" , "C %")])

# Create dataFrame for template
template = pd.DataFrame.from_records([template_seed])

#template

In [None]:
count = 0 # first row
id_mapping = {} # dictionary of ids for types

for i in new_types.index:

    row_od = OrderedDict([]) #new template row as an empty ordered dictionary
    for c in template.columns: #make columns and blank data for new template row
        row_od.update([(c , "")])
    
    #these are the same in each row
    row_od["Xref_def"] = "FlyBase:FBrf0247391"
    row_od["MFsyn_ref"] = "FlyBase:FBrf0247391"
    row_od["MTsyn_ref"] = "FlyBase:FBrf0247391"
    row_od["created_by"] = "http://orcid.org/0000-0002-1373-1705"
    row_od["creation_date"] = "2021-03-22T12:00:00Z"
    row_od["comment"] = "Nomenclature scheme will be explained in a later publication (Feng et al., 2020)."
    row_od['obo_namespace'] = "fly_anatomy.ontology"

    # definition   
    row_od["definition"] = definition_writer(neuromeres=new_types['Segments'][i], 
                                         cell_type=new_types['Cell Type'][i], 
                                         response=new_types['response to MDN'][i], 
                                         subtype=('FBbt' in new_types['Sub_ID'][i]), 
                                         number=new_types['Cell number'][i], 
                                         hetero=new_types['hetero'][i], 
                                         ascending=new_types['ascending'][i])
    
    # conditional data
    if (len(new_types['Segments'][i]) == 1):
        row_od["soma"] = FBbt_dict[new_types['Segments'][i][0]]
        if(len(new_types['MT'][i]) > 0):     
            row_od["MTSynonym"] = "adult %s neuron" % new_types['MT'][i]

    if 'FBbt' in new_types['Sub_ID'][i]:
        row_od["class_type"] = 'equivalent'
        row_od["Name"] = label_maker(new_types['Cell Type'][i], subtype=True, neuromere=new_types['Segments'][i][0])
        row_od["ID"] = new_types['Sub_ID'][i]
        row_od["parent"] = new_types['Main_ID'][i]
        row_od["soma"] = FBbt_dict[new_types['Segments'][i][0]]

    else:
        row_od["class_type"] = 'subclass'
        row_od["ID"] = new_types['Main_ID'][i]
        row_od["Name"] = label_maker(new_types['Cell Type'][i])
        row_od["MFSynonym"] = "adult MF%s neuron" % new_types['MF'][i].zfill(2)
        if new_types['ascending'][i] == 1:
            row_od["parent"] = FBbt_dict['adult ascending neuron']
        else:
            row_od["parent"] = FBbt_dict['adult neuron']
    
    # add obo_id == ID
    row_od['obo_id'] = row_od["ID"]
        
    
    #make new row into a DataFrame and add it to template
    new_row = pd.DataFrame.from_records([row_od])
    template = pd.concat([template, new_row], ignore_index=True, sort=False)

    count +=1
        
template

In [None]:
template.to_csv('template.tsv', sep='\t', index=None)