In [1]:
import pandas as pd
import csv
import re
from num2words import num2words
from collections import OrderedDict

# Turn CSV into a pandas DataFrame

raw_data_table = pd.read_csv('./spreadsheets/group_detail.csv', sep=',')

#raw_data_table[:10]

In [2]:
# Dictionary of FBbt IDs and short names (not labels) for new and existing terms to be used
ID_table = pd.read_csv('./spreadsheets/ID_name.csv', sep=',')
lookup = dict(zip(ID_table.Keys,ID_table.Values))
#lookup

In [3]:
position_dict = {"AV" : "anterior ventral", \
                "PV" : "posterior ventral", \
                "AD" : "anterior dorsal", \
                "PD" : "posterior dorsal"}

In [4]:
# Make a dictionary with key - column header & value = template specification (first row of table).

template_seed = OrderedDict([ ('ID' , 'ID'), ('CLASS_TYPE' , 'CLASS_TYPE'),\
                             ('RDF_Type' , 'TYPE' )])

#label, definition, short synonym:
template_seed.update([("Name" , "A rdfs:label"), ("Definition" , "A IAO:0000115"),\
                      ("Xref_def" , ">A oboInOwl:hasDbXref"),\
                      ("created_by" , "A oboInOwl:created_by"),\
                      ("creation_date", "A oboInOwl:creation_date")])

#short name synonym
template_seed.update([("Synonym" , "A oboInOwl:hasExactSynonym"),\
                      ("syn_ref" , ">A oboInOwl:hasDbXref")])\

# Columns for tract superclass:
template_seed.update([("PNT" , "SC %")])

# Create dataFrame for template
template = pd.DataFrame.from_records([template_seed])

template

Unnamed: 0,ID,CLASS_TYPE,RDF_Type,Name,Definition,Xref_def,created_by,creation_date,Synonym,syn_ref,PNT
0,ID,CLASS_TYPE,TYPE,A rdfs:label,A IAO:0000115,>A oboInOwl:hasDbXref,A oboInOwl:created_by,A oboInOwl:creation_date,A oboInOwl:hasExactSynonym,>A oboInOwl:hasDbXref,SC %


In [5]:
def groupname_splitter(shortname):
    """
    Splits group names - two letters / any number of digits.
    """
    short = str(shortname)
    pattern = re.compile("[A-Z][A-Z][0-9]+")
    if pattern.fullmatch(short):
        return [short[0:2], short[2:]]
    else:
        raise ValueError("Invalid group name - must be 2 letters, followed by numbers.")
    

In [6]:
def definition_generator(shortname, dictionary):
    location = groupname_splitter(shortname)
    brain_area = dictionary[location[0]]

    return ("Primary neurite tract of the %s brain in the %s clockwise position "
            "(%s perspective), from ventrolateral to dorsal to ventromedial, of tracts "
            "that enter the lateral horn (Frechter et al., 2019)."
            %(brain_area,num2words(int(location[1]), to='ordinal'),brain_area.split()[0]))
    

In [7]:
def label_maker(shortname, dictionary):
    location = groupname_splitter(shortname)
    return "%s primary neurite tract %s"%(dictionary[location[0]],location[1])

In [8]:
count = 0 # first row

for i in raw_data_table.index:

    r = raw_data_table.short[count]
    row_od = OrderedDict([]) #new template row as an empty ordered dictionary
    for c in template.columns: #make columns and blank data for new template row
        row_od.update([(c , "")])
    
    #these are the same in each row
    row_od["CLASS_TYPE"] = "subclass"
    row_od["RDF_Type"] = "owl:Class"
    row_od["Xref_def"] = "FlyBase:FBrf0242628"
    row_od["syn_ref"] = "FlyBase:FBrf0242628"
    row_od["created_by"] = "http://orcid.org/0000-0002-1373-1705"
    row_od["creation_date"] = "2020-05-01T12:00:00Z"

    #easy to generate data
    row_od["ID"] = lookup[r]
    row_od["Synonym"] = r + " primary neurite tract"
    row_od["Name"] = label_maker(r,position_dict)
    row_od["PNT"] = lookup["PNT"]
    
    #STUFF FOR DEFINITION
    
    definition = definition_generator(r,position_dict)
    if raw_data_table.main_type[i] == "LHLN":
        definition += (" Lateral horn neurons that enter the neuropil via this "
        "tract are predominantly local neurons (Frechter et al., 2019).")
    elif raw_data_table.main_type[i] == "LHON":
        definition += (" Lateral horn neurons that enter the neuropil via this "
        "tract are predominantly output neurons (Frechter et al., 2019).")
    row_od["Definition"] = definition
    
            
    
    #make new row into a DataFrame and add it to template
    new_row = pd.DataFrame.from_records([row_od])
    template = pd.concat([template, new_row], ignore_index=True, sort=False)

    count +=1
    
template

Unnamed: 0,ID,CLASS_TYPE,RDF_Type,Name,Definition,Xref_def,created_by,creation_date,Synonym,syn_ref,PNT
0,ID,CLASS_TYPE,TYPE,A rdfs:label,A IAO:0000115,>A oboInOwl:hasDbXref,A oboInOwl:created_by,A oboInOwl:creation_date,A oboInOwl:hasExactSynonym,>A oboInOwl:hasDbXref,SC %
1,FBbt:00049287,subclass,owl:Class,posterior ventral primary neurite tract 1,Primary neurite tract of the posterior ventral...,FlyBase:FBrf0242628,http://orcid.org/0000-0002-1373-1705,2020-05-01T12:00:00Z,PV1 primary neurite tract,FlyBase:FBrf0242628,FBbt:00048287
2,FBbt:00049288,subclass,owl:Class,posterior ventral primary neurite tract 2,Primary neurite tract of the posterior ventral...,FlyBase:FBrf0242628,http://orcid.org/0000-0002-1373-1705,2020-05-01T12:00:00Z,PV2 primary neurite tract,FlyBase:FBrf0242628,FBbt:00048287
3,FBbt:00049289,subclass,owl:Class,posterior ventral primary neurite tract 3,Primary neurite tract of the posterior ventral...,FlyBase:FBrf0242628,http://orcid.org/0000-0002-1373-1705,2020-05-01T12:00:00Z,PV3 primary neurite tract,FlyBase:FBrf0242628,FBbt:00048287
4,FBbt:00049290,subclass,owl:Class,posterior ventral primary neurite tract 4,Primary neurite tract of the posterior ventral...,FlyBase:FBrf0242628,http://orcid.org/0000-0002-1373-1705,2020-05-01T12:00:00Z,PV4 primary neurite tract,FlyBase:FBrf0242628,FBbt:00048287
5,FBbt:00049291,subclass,owl:Class,posterior ventral primary neurite tract 5,Primary neurite tract of the posterior ventral...,FlyBase:FBrf0242628,http://orcid.org/0000-0002-1373-1705,2020-05-01T12:00:00Z,PV5 primary neurite tract,FlyBase:FBrf0242628,FBbt:00048287
6,FBbt:00049292,subclass,owl:Class,posterior ventral primary neurite tract 6,Primary neurite tract of the posterior ventral...,FlyBase:FBrf0242628,http://orcid.org/0000-0002-1373-1705,2020-05-01T12:00:00Z,PV6 primary neurite tract,FlyBase:FBrf0242628,FBbt:00048287
7,FBbt:00049293,subclass,owl:Class,posterior ventral primary neurite tract 7,Primary neurite tract of the posterior ventral...,FlyBase:FBrf0242628,http://orcid.org/0000-0002-1373-1705,2020-05-01T12:00:00Z,PV7 primary neurite tract,FlyBase:FBrf0242628,FBbt:00048287
8,FBbt:00049294,subclass,owl:Class,posterior ventral primary neurite tract 8,Primary neurite tract of the posterior ventral...,FlyBase:FBrf0242628,http://orcid.org/0000-0002-1373-1705,2020-05-01T12:00:00Z,PV8 primary neurite tract,FlyBase:FBrf0242628,FBbt:00048287
9,FBbt:00049295,subclass,owl:Class,posterior ventral primary neurite tract 9,Primary neurite tract of the posterior ventral...,FlyBase:FBrf0242628,http://orcid.org/0000-0002-1373-1705,2020-05-01T12:00:00Z,PV9 primary neurite tract,FlyBase:FBrf0242628,FBbt:00048287


In [9]:
template.to_csv("./template.tsv", sep = "\t", header=True, index=False)