In [None]:
import pandas as pd
import csv
import re
from num2words import num2words
from collections import OrderedDict

# Turn CSV into a pandas DataFrame

raw_data_table = pd.read_csv('./spreadsheets/group_detail.csv', sep=',')

# Show first 10 rows of table
#raw_data_table

In [None]:
# Dictionary of FBbt IDs and short names (not labels) for new and existing terms to be used
ID_table = pd.read_csv('./spreadsheets/ID_name.csv', sep=',')
lookup = dict(zip(ID_table.Keys,fbrf_table.Values))
#lookup

In [None]:
# Make a dictionary with key - column header & value = template specification (first row of table).
# Make first two columns

template_seed = OrderedDict([ ('ID' , 'ID'), ('CLASS_TYPE' , 'CLASS_TYPE'),\
                             ('RDF_Type' , 'TYPE' )])

#label, description, short synonym:

template_seed.update([("Name" , "A rdfs:label"), ("Definition" , "A IAO:0000115"),\
                      ("Xref_def" , ">A oboInOwl:hasDbXref"),\
                      ("created_by" , "A oboInOwl:created_by"),\
                      ("creation_date", "A oboInOwl:creation_date")])

#short name synonyms
template_seed.update([("Synonym1" , "A oboInOwl:hasExactSynonym"),\
                      ("syn_type" , ">A oboInOwl:HasSynonymType"),\
                      ("Synonym2" , "A oboInOwl:hasExactSynonym"),\
                      ("syn_ref" , ">A oboInOwl:hasDbXref")]) 

# Columns for neuron superclass and soma location:
template_seed.update([("Neuron" , "SC %"), ("Soma" , "SC %")])

# Create dataFrame for template
# from_records takes a list of dicts - one for each row.  We only have one row.

template = pd.DataFrame.from_records([template_seed])

template

In [None]:
def groupname_splitter(shortname):
    """
    Splits group names - two letters / any number of digits.
    """
    short = str(shortname)
    if short.re.match([a-z][a-z][0-9]+):
        return (short[0,1], short[2:])
    else:
        raise ValueError("Invalid group name - must be 2 letters, followed by numbers.")
    

In [None]:
def location_text_generator(shortname):
    location = groupname_splitter(shortname)
    position_dict = {"AV" : "anterior ventral", \
                    "PV" : "posterior ventral", \
                    "AD" : "anterior dorsal", \
                    "PD" : "posterior dorsal"}
    return "Neuron belonging to a group that shares a primary neurite tract entering the \
    lateral horn. Neurons of this class have their cell bodies located in the \
    " + position_dict[location[0]] + " part of the brain and its cell body fiber \
    

In [None]:
def label_maker(shortname):
    letter_regions = {"a" : "of the anterior dorsal brain", "b" : "of the anterior ventral brain",\
                      "c" : "of the pars intercerebralis", "d" : "of the anterior brain",\
                      "g" : "of the gnathal ganglion", "p" : "of the posterior brain",\
                      "x" : "outside of the brain"}
    region = letter_regions[short[2]]
    return "descending neuron %s %s"%(region,short)

In [None]:
count = 0 # first row

for i in raw_data_table.index:

    r = raw_data_table.short[count]
    row_od = OrderedDict([]) #new template row as an empty ordered dictionary
    for c in template.columns: #make columns and blank data for new template row
        row_od.update([(c , "")])
    
    #these are the same in each row
    row_od["CLASS_TYPE"] = "subclass"
    row_od["RDF_Type"] = "owl:Class"
    row_od["Xref_def"] = "FlyBase:FBrf0242628"
    row_od["syn_ref"] = "FlyBase:FBrf0242628"
    row_od["created_by"] = "http://orcid.org/0000-0002-1373-1705"
    row_od["creation_date"] = "2020-05-01T12:00:00Z"

    #easy to generate data
    row_od["ID"] = lookup[r]
    row_od["Synonym1"] = r + " tract"
    row_od["Synonym2"] = r
    row_od["Name"] = label_maker(r)
    if r in sim_comment_od:
        row_od["Comment"] = sim_comment_od[r]

    #is_a relationship
    is_a = lookup[r[0:3]]
    row_od[is_a] = is_a
    
    #may be equivalent to
    if r in sim_eqto_od:
        count_eq = 0
        for i in sim_eqto_od[r]:
            count_eq += 1
            row_od["eq_to" + str(count_eq)] = i
            row_od["eq_to_ref" + str(count_eq)] = "FlyBase:FBrf0239335"    
    
    #FOR SYNAPSING AND FASCICULATION
    #get column names where value > 0
    names = raw_data_table[:count].columns[(raw_data_table > 0).iloc[count]]
    #convert these to FBrfs
    FBrfs = list()
    for n in names:
        if n in lookup:
            FBrfs.append(lookup[n])
        else: continue
    #make these into columns in row_od
    for f in FBrfs:
        row_od[f] = f
    
    #STUFF FOR DEFINITION
    #DN group
    r_group = raw_data_table.Group[count]
    group_text = group_text_generator(r_group)
    #number_cells
    num_cells = raw_data_table.max_cells[count]
    number_cells_text = cell_text_generator(num_cells)
    #crossing and descending side
    cross = raw_data_table.crossing_midline[count]
    side = raw_data_table.Descending_side[count]
    side_text = side_text_generator(cross,side)
    
    #using x > 0 names generated for relationships
    synapsing_names = list()
    for n in names:
        if (n in nice_names) and (n in process_neuropil_names):
            synapsing_names.append(nice_names[n])
        else: continue
    
    synapses_in = name_lister(synapsing_names)
    if synapses_in != False:
        synapsing_text = " It has neurites in the " + synapses_in + "."
    else: synapsing_text = ""
    
    tract_names = list()
    for n in names:
        if (n in nice_names) and (n in pathway_tract_names):
            tract_names.append(nice_names[n])
        else: continue
            
    in_tracts = name_lister(tract_names)
    if in_tracts != False:
        tract_text = " It fasciculates with the " + in_tracts + " in the thoracico-abdominal ganglion."
    else: tract_text = ""

            
    row_od["Definition"] = group_text + side_text + synapsing_text + tract_text + number_cells_text
            
    
    #make new row into a DataFrame and add it to template
    new_row = pd.DataFrame.from_records([row_od])
    template = pd.concat([template, new_row], ignore_index=True, sort=False)

    count +=1
    
template

In [None]:
template.to_csv("./template.tsv", sep = "\t", header=True, index=False)