In [1]:
import pandas as pd
import csv
import re
from num2words import num2words
from collections import OrderedDict

# Turn CSV into a pandas DataFrame

raw_data_table = pd.read_csv('./spreadsheets/neuron_detail.csv', sep=',')

#raw_data_table[:10]

In [2]:
# Dictionary of FBbt IDs and short names (not labels) for new and existing terms to be used
ID_table = pd.read_csv('./spreadsheets/ID_name.csv', sep=',')
lookup = dict(zip(ID_table.Keys,ID_table.Values))
#lookup

In [3]:
# map of which cells have values
valuemap = pd.notna(raw_data_table)
#valuemap[:10]

In [4]:
typename = {'LHON' : 'lateral horn output neuron',
            'LHLN' : 'lateral horn local neuron',
            'LHIN' : 'lateral horn input neuron',
            'LHN' : 'lateral horn neuron'}

NTs = {'Acetylcholine' : 'cholinergic',
      'GABA' : 'GABA-ergic',
      'Glutamate' : 'glutamatergic',
      'Dopamine' : 'dopaminergic'}

In [5]:
# Make a dictionary with key - column header & value = template specification (first row of table).

template_seed = OrderedDict([ ('ID' , 'ID'), ('CLASS_TYPE' , 'CLASS_TYPE'),\
                             ('RDF_Type' , 'TYPE' )])

#label, definition, short synonym:
template_seed.update([("Name" , "A rdfs:label"), ("Definition" , "A IAO:0000115"),\
                      ("Xref_def" , ">A oboInOwl:hasDbXref SPLIT=|"),\
                      ("created_by" , "A oboInOwl:created_by"),\
                      ("creation_date", "A oboInOwl:creation_date")])

#short name synonym
template_seed.update([("Synonym" , "A oboInOwl:hasExactSynonym"),\
                      ("syn_ref" , ">A oboInOwl:hasDbXref SPLIT=|"),\
                      ("Jeanne_Synonym" , "A oboInOwl:hasNarrowSynonym"),\
                      ("syn_ref_JS" , ">A oboInOwl:hasDbXref"),\
                      ("comment" , "A rdfs:comment")])

# Columns for tract superclass:
template_seed.update([("fasciculation" , "SC 'fasciculates with' some %"),\
                      ("neurotransmitter" , "SC 'capable of' some %"),\
                      ("type", "SC %"),\
                      ("Brain", "SC 'part of' some %")])

# Create dataFrame for template
template = pd.DataFrame.from_records([template_seed])

template

Unnamed: 0,ID,CLASS_TYPE,RDF_Type,Name,Definition,Xref_def,created_by,creation_date,Synonym,syn_ref,Jeanne_Synonym,syn_ref_JS,comment,fasciculation,neurotransmitter,type,Brain
0,ID,CLASS_TYPE,TYPE,A rdfs:label,A IAO:0000115,>A oboInOwl:hasDbXref SPLIT=|,A oboInOwl:created_by,A oboInOwl:creation_date,A oboInOwl:hasExactSynonym,>A oboInOwl:hasDbXref SPLIT=|,A oboInOwl:hasNarrowSynonym,>A oboInOwl:hasDbXref,A rdfs:comment,SC 'fasciculates with' some %,SC 'capable of' some %,SC %,SC 'part of' some %


In [6]:
def groupname_splitter(tractname):
    """
    Splits group names - two (uppercase) letters / one or more digits.
    """
    short = str(tractname)
    pattern = re.compile("[A-Z][A-Z][0-9]+")
    if pattern.fullmatch(short):
        return [short[0:2], short[2:]]
    else:
        raise ValueError("Invalid tract name - must be 2 letters, followed by numbers.")
    

In [7]:
def shortname_splitter(shortname):
    """
    Splits neuron names - two (uppercase) letters + one or more digits 
    / one or more (lowercase) letters + one or more digits.
    """
    m = re.match("([A-Z][A-Z][0-9]+)([a-z]+[0-9]+)",shortname)
    if m: return m.groups()
    else:
        raise ValueError("Invalid neuron name.")


In [8]:
def base_definition_generator(shortname,n_type,citation):

    return ("Adult %s neuron that fasciculates with the %s primary neurite tract %s."
            %(n_type,shortname_splitter(shortname)[0], citation))

In [9]:
def name_lister(names):
    L = ""
    if len(names) < 1:
        return False
    elif len(names) == 1:
        return names[0]
    elif len(names) > 1:
        L = names[0]
        if len(names) > 2:
            for i in names[1:-1]:
                L = L + ", " + i
        L = L + " and " + names[-1]
        return L

In [10]:
count = 0 # first row

for i in raw_data_table.index:

    r = raw_data_table.short[count]
    row_od = OrderedDict([]) #new template row as an empty ordered dictionary
    for c in template.columns: #make columns and blank data for new template row
        row_od.update([(c , "")])
    
    #these are the same in each row
    row_od["CLASS_TYPE"] = "subclass"
    row_od["RDF_Type"] = "owl:Class"
    row_od["Xref_def"] = raw_data_table.xref[i]
    row_od["syn_ref"] = raw_data_table.xref[i]
    row_od["created_by"] = "http://orcid.org/0000-0002-1373-1705"
    row_od["creation_date"] = "2020-05-01T12:00:00Z"
    row_od["Brain"] = lookup["adBrain"]

    #easy to generate data
    row_od["ID"] = lookup[r]
    row_od["Synonym"] = "adult " + r + " neuron"
    row_od["Name"] = "adult %s neuron"%(r)
    if valuemap.Jeanne_synonym[i]:
        row_od["Jeanne_Synonym"] = raw_data_table.Jeanne_synonym[i]
        row_od["syn_ref_JS"] = "FlyBase:FBrf0242477"
        
    row_od["fasciculation"] = lookup[shortname_splitter(r)[0]]
    row_od["type"] = lookup[raw_data_table.type[i]]
    if valuemap.comment[i]:
        row_od["comment"] = raw_data_table.comment[i]
    if valuemap.neurotransmitter[i]:
        row_od["neurotransmitter"] = " | ".join(
            [lookup[n] for n in raw_data_table.neurotransmitter[i].split(" | ")])

    
    #STUFF FOR DEFINITION

    definition = base_definition_generator(
        r,typename[raw_data_table.type[i]],raw_data_table.xref_cite[i])
    
    if valuemap.number[i]:
        neuron_number = num2words(int(round(raw_data_table.number[i],0)))
        if valuemap.neurotransmitter[i]:
            definition += ("There are %s of these neurons (Frechter et al., 2019)."
                           %(neuron_number))
        else:
            definition += "There are %s of these neurons and they "%(neuron_number)
    
    
    if valuemap.neurotransmitter[i]:
        neurotransmitters = name_lister(
            [NTs[n] for n in raw_data_table.neurotransmitter[i].split(" | ")])
        if valuemap.number[i]:
            definition += "are %s (Frechter et al., 2019)."%(neurotransmitters)
        else:
            definition += " These neurons are %s (Frechter et al., 2019)."%(neurotransmitters)

        
    row_od["Definition"] = definition
    
            
    
    #make new row into a DataFrame and add it to template
    new_row = pd.DataFrame.from_records([row_od])
    template = pd.concat([template, new_row], ignore_index=True, sort=False)

    count +=1
    
template

Unnamed: 0,ID,CLASS_TYPE,RDF_Type,Name,Definition,Xref_def,created_by,creation_date,Synonym,syn_ref,Jeanne_Synonym,syn_ref_JS,comment,fasciculation,neurotransmitter,type,Brain
0,ID,CLASS_TYPE,TYPE,A rdfs:label,A IAO:0000115,>A oboInOwl:hasDbXref SPLIT=|,A oboInOwl:created_by,A oboInOwl:creation_date,A oboInOwl:hasExactSynonym,>A oboInOwl:hasDbXref SPLIT=|,A oboInOwl:hasNarrowSynonym,>A oboInOwl:hasDbXref,A rdfs:comment,SC 'fasciculates with' some %,SC 'capable of' some %,SC %,SC 'part of' some %
1,FBbt:00049318,subclass,owl:Class,adult AD1a1 neuron,Adult lateral horn output neuron neuron that f...,FlyBase:FBrf0242628 | FlyBase:FBrf0242477,http://orcid.org/0000-0002-1373-1705,2020-05-01T12:00:00Z,adult AD1a1 neuron,FlyBase:FBrf0242628 | FlyBase:FBrf0242477,L1/NA,FlyBase:FBrf0242477,There are approximately 6 neurons that belong ...,FBbt:00049306,,FBbt:00049286,FBbt:00003624
2,FBbt:00049319,subclass,owl:Class,adult AD1a2 neuron,Adult lateral horn output neuron neuron that f...,FlyBase:FBrf0242628 | FlyBase:FBrf0242477,http://orcid.org/0000-0002-1373-1705,2020-05-01T12:00:00Z,adult AD1a2 neuron,FlyBase:FBrf0242628 | FlyBase:FBrf0242477,,,,FBbt:00049306,GO:0014055,FBbt:00049286,FBbt:00003624
3,FBbt:00049320,subclass,owl:Class,adult AD1a3 neuron,Adult lateral horn output neuron neuron that f...,FlyBase:FBrf0242628,http://orcid.org/0000-0002-1373-1705,2020-05-01T12:00:00Z,adult AD1a3 neuron,FlyBase:FBrf0242628,,,,FBbt:00049306,,FBbt:00049286,FBbt:00003624
4,FBbt:00049321,subclass,owl:Class,adult AD1a4 neuron,Adult lateral horn output neuron neuron that f...,FlyBase:FBrf0242628,http://orcid.org/0000-0002-1373-1705,2020-05-01T12:00:00Z,adult AD1a4 neuron,FlyBase:FBrf0242628,,,,FBbt:00049306,,FBbt:00049286,FBbt:00003624
5,FBbt:00049322,subclass,owl:Class,adult AD1b1 neuron,Adult lateral horn output neuron neuron that f...,FlyBase:FBrf0242628,http://orcid.org/0000-0002-1373-1705,2020-05-01T12:00:00Z,adult AD1b1 neuron,FlyBase:FBrf0242628,,,,FBbt:00049306,,FBbt:00049286,FBbt:00003624
6,FBbt:00049323,subclass,owl:Class,adult AD1b2 neuron,Adult lateral horn output neuron neuron that f...,FlyBase:FBrf0242628 | FlyBase:FBrf0242477,http://orcid.org/0000-0002-1373-1705,2020-05-01T12:00:00Z,adult AD1b2 neuron,FlyBase:FBrf0242628 | FlyBase:FBrf0242477,ML2/ML3,FlyBase:FBrf0242477,,FBbt:00049306,GO:0014055,FBbt:00049286,FBbt:00003624
7,FBbt:00049324,subclass,owl:Class,adult AD1c1 neuron,Adult lateral horn output neuron neuron that f...,FlyBase:FBrf0242628,http://orcid.org/0000-0002-1373-1705,2020-05-01T12:00:00Z,adult AD1c1 neuron,FlyBase:FBrf0242628,,,,FBbt:00049306,,FBbt:00049286,FBbt:00003624
8,FBbt:00049325,subclass,owl:Class,adult AD1d1 neuron,Adult lateral horn output neuron neuron that f...,FlyBase:FBrf0242477,http://orcid.org/0000-0002-1373-1705,2020-05-01T12:00:00Z,adult AD1d1 neuron,FlyBase:FBrf0242477,,,,FBbt:00049306,GO:0014055,FBbt:00049286,FBbt:00003624
9,FBbt:00049326,subclass,owl:Class,adult AD1f1 neuron,Adult lateral horn output neuron neuron that f...,FlyBase:FBrf0242477,http://orcid.org/0000-0002-1373-1705,2020-05-01T12:00:00Z,adult AD1f1 neuron,FlyBase:FBrf0242477,L1/NA,FlyBase:FBrf0242477,There are approximately 6 neurons that belong ...,FBbt:00049306,,FBbt:00049286,FBbt:00003624


In [11]:
template.to_csv("./template.tsv", sep = "\t", header=True, index=False)