In [None]:
import pandas as pd
import csv
import re
from num2words import num2words
from collections import OrderedDict

# Turn CSV into a pandas DataFrame

raw_data_table = pd.read_csv('./spreadsheets/input_neurons.csv', sep=',')

#raw_data_table[:10]

In [None]:
# Dictionary of FBbt IDs and short names (not labels) for new and existing terms to be used
ID_table = pd.read_csv('./spreadsheets/ID_name.csv', sep=',')
lookup = dict(zip(ID_table.Keys,ID_table.Values))
#lookup

In [None]:
# map of which cells have values
valuemap = pd.notna(raw_data_table)
#valuemap[:10]

In [None]:
typename = {'LHON' : 'lateral horn output neuron',
            'LHLN' : 'lateral horn local neuron',
            'LHIN' : 'lateral horn input neuron',
            'LHN' : 'lateral horn neuron'}

NTs = {'Acetylcholine' : 'cholinergic',
      'GABA' : 'GABAergic',
      'Glutamate' : 'glutamatergic',
      'Dopamine' : 'dopaminergic'}

neuropils = {'AL' : 'antennal lobe',
             'AMMC' : 'antennal mechanosensory and motor center',
             'AVLP' : 'anterior ventrolateral protocerebrum',
             'GNG' : 'gnathal ganglion',
             'LO' : 'lobula',
             'mAL' : 'medial antennal lobe',
             'PLP' : 'posterior lateral protocerebrum',
             'VNC' : 'thoracico-abdominal ganglion',
             'WED' : 'wedge'}

tracts = {'mALT' : 'the medial antennal lobe tract',
          'lALT' : 'the lateral antennal lobe tract',
          't3ALT' : 'transverse antennal lobe tract 3'}

In [None]:
# Make a dictionary with key - column header & value = template specification (first row of table).

template_seed = OrderedDict([ ('ID' , 'ID'), ('CLASS_TYPE' , 'CLASS_TYPE'),\
                             ('RDF_Type' , 'TYPE' )])

#label, definition, short synonym:
template_seed.update([("Name" , "A rdfs:label"), ("Definition" , "A IAO:0000115"),\
                      ("Xref_def" , ">A oboInOwl:hasDbXref SPLIT=|"),\
                      ("created_by" , "A dc:contributor"),\
                      ("creation_date", "A dc:date")])

#short name synonym
template_seed.update([("Synonym" , "A oboInOwl:hasExactSynonym"),\
                      ("syn_ref" , ">A oboInOwl:hasDbXref SPLIT=|"),\
                      ("comment" , "A rdfs:comment")])

# Columns for tract, NT, lhn type, p_o assertion:
template_seed.update([("fasciculation" , "SC 'fasciculates with' some %"),\
                      ("neurotransmitter" , "SC 'capable of' some % SPLIT=|"),\
                      ("type", "SC %"),\
                      ("innervation", "SC 'has postsynaptic terminal in' some %"),\
                      ("adNS", "SC 'part of' some %")])

# Create dataFrame for template
template = pd.DataFrame.from_records([template_seed])

template

In [None]:
def groupname_splitter(tractname):
    """
    Splits tract names - two (uppercase) letters / one or more digits.
    """
    short = str(tractname)
    pattern = re.compile("[A-Z][A-Z][0-9]+")
    if pattern.fullmatch(short):
        return [short[0:2], short[2:]]
    else:
        raise ValueError("Invalid tract name - must be 2 letters, followed by numbers.")
    

In [None]:
def shortname_splitter(shortname):
    """
    Splits input neuron names by - and returns a match object with 3 groups.
    """
    m = re.match("(?P<neuropil>[a-zA-Z]+)-(?P<tract>[a-zA-Z0-9]*)-{,1}(?P<number>[A-Z]{2}[0-9]*)"
                 ,shortname)
    if m: return m
    else:
        raise ValueError("Invalid neuron name.")


In [None]:
def base_definition_generator(shortname,n_type,citation):
    den_def = ("Adult %s that has its dendrites predominantly within the %s "
               %(n_type,neuropils[shortname_splitter(shortname).group('neuropil')]))
    fas_def = ""
    fas_tract = shortname_splitter(shortname).group('tract')
    if fas_tract in tracts.keys():
        fas_def += "and fasciculates with %s "%(tracts[fas_tract])
    return den_def + fas_def + citation + "."

In [None]:
def name_lister(names):
    L = ""
    if len(names) < 1:
        return False
    elif len(names) == 1:
        return names[0]
    elif len(names) > 1:
        L = names[0]
        if len(names) > 2:
            for i in names[1:-1]:
                L = L + ", " + i
        L = L + " and " + names[-1]
        return L

In [None]:
count = 0 # first row

for i in raw_data_table.index:

    r = raw_data_table.short[count]
    row_od = OrderedDict([]) #new template row as an empty ordered dictionary
    for c in template.columns: #make columns and blank data for new template row
        row_od.update([(c , "")])
    
    #these are the same in each row
    row_od["CLASS_TYPE"] = "subclass"
    row_od["RDF_Type"] = "owl:Class"
    row_od["Xref_def"] = raw_data_table.xref[i]
    row_od["syn_ref"] = raw_data_table.xref[i]
    row_od["created_by"] = "http://orcid.org/0000-0002-1373-1705"
    row_od["creation_date"] = "2020-05-01T12:00:00Z"
    row_od["adNS"] = lookup["adNS"]

    #easy to generate data
    row_od["ID"] = lookup[r]
    row_od["Synonym"] = "adult %s neuron"%(r)
    if valuemap.longname[i]:
        row_od["Name"] = raw_data_table.longname[i]
    else:
        row_od["Name"] = "adult %s lateral horn input neuron"%(r)
    
    if len(shortname_splitter(r).group('tract')) > 0:
        try:
            row_od["fasciculation"] = lookup[shortname_splitter(r).group('tract')]
        except KeyError: pass
    row_od["type"] = lookup[raw_data_table.type[i]]
    if valuemap.comment[i]:
        row_od["comment"] = raw_data_table.comment[i]
    if valuemap.neurotransmitter[i]:
        row_od["neurotransmitter"] = " | ".join(
            [lookup[n] for n in raw_data_table.neurotransmitter[i].split(" | ")])
    row_od["innervation"] = lookup[shortname_splitter(r).group('neuropil')]

    
    #STUFF FOR DEFINITION

    definition = base_definition_generator(
        r,typename[raw_data_table.type[i]],raw_data_table.xref_cite[i])
    
    if valuemap.number[i]:
        neuron_number = num2words(int(round(raw_data_table.number[i],0)))
        if valuemap.neurotransmitter[i] and (round(raw_data_table.number[i],0) > 1):
            definition += (" There are %s of these neurons per hemisphere and they are "
                           %(neuron_number))
        elif valuemap.neurotransmitter[i] and (round(raw_data_table.number[i],0) == 1):
            definition += " There is one of these neurons per hemisphere and it is "
        elif round(raw_data_table.number[i],0) == 1:
            definition += (" There is one of these neurons per hemisphere "
                           "(Dolan et al., 2019).")
        else:
            definition += (" There are %s of these neurons per hemisphere "
                           "(Dolan et al., 2019)."%(neuron_number))     
    
    if valuemap.neurotransmitter[i]:
        neurotransmitters = name_lister(
            [NTs[n] for n in raw_data_table.neurotransmitter[i].split(" | ")])
        if valuemap.number[i]:
            definition += "%s (Dolan et al., 2019)."%(neurotransmitters)
        else:
            definition += " It is %s (Dolan et al., 2019)."%(neurotransmitters)

        
    row_od["Definition"] = definition
    
            
    
    #make new row into a DataFrame and add it to template
    new_row = pd.DataFrame.from_records([row_od])
    template = pd.concat([template, new_row], ignore_index=True, sort=False)

    count +=1
    
template

In [None]:
template.to_csv("./template.tsv", sep = "\t", header=True, index=False)