In [321]:
import pandas as pd
from num2words import num2words
from collections import OrderedDict

# Turn CSV into a pandas DataFrame

raw_data_table = pd.read_csv('./spreadsheets/DN-reformatted.csv', sep=',')

# Show first 10 rows of table
#raw_data_table

In [322]:
fbrf_table = pd.read_csv('./spreadsheets/fbrf-table.csv', sep=',')
lookup = dict(zip(fbrf_table.Keys,fbrf_table.Values))#FBrfs for existing terms
#lookup

In [323]:
nice_name_table = pd.read_csv('./spreadsheets/nice-names-table.csv', sep=',')
nice_names = dict(zip(nice_name_table.Keys,nice_name_table.Values))#improved names for existing terms from raw_data
#nice_names

In [324]:
start = 47572
DN_fbrfs = list()
    
for i in raw_data_table.index:
    x = start + i
    ID = "FBbt:000"+str(x)
    DN_fbrfs.append(ID)

DN_dict = dict(zip(raw_data_table.short,DN_fbrfs))
#DN_dict

In [325]:
#list of fbbts for DN_groups
DN_group_names = set(raw_data_table.Group)

DN_group_fbbts = list()
for i in DN_group_names:
    x = lookup[i]
    DN_group_fbbts.append(x)

#DN_group_names
#DN_group_fbbts

In [326]:
#list of names for processes (neuropils)
process_neuropil_names = open('./spreadsheets/neuropils.txt', 'r')
process_neuropil_names = process_neuropil_names.read().splitlines()
#process_neuropil_names

In [327]:
#list of fbbts for processes (neuropils)
processes = list()

for i in process_neuropil_names:
    x = lookup[i]
    processes.append(x)

#processes[0:5]

In [328]:
#list of fbbts for pathways (tracts)
pathway_tract_names = open('./spreadsheets/tracts.csv', 'r')
pathway_tract_names = pathway_tract_names.read().splitlines()
#pathway_tract_names[0:5]

In [329]:
#list of fbbts for pathways (tracts)
tracts = list()

for i in pathway_tract_names:
    x = lookup[i]
    tracts.append(x)

#tracts[0:5]

In [330]:
def name_lister(names):
    L = ""
    if len(names) < 1:
        return False
    elif len(names) == 1:
        return names[0]
    elif len(names) > 1:
        L = names[0]
        if len(names) > 2:
            for i in names[1:-1]:
                L = L + ", " + i
        L = L + " and " + names[-1]
        return L
        

In [331]:
#similar neurons - comment#######################
similar_neurons_df = pd.read_csv('./spreadsheets/similar-neurons.tsv', sep='\t')
similar_neurons_df = similar_neurons_df.fillna("") #must do this first - won't replace 'nan'
similar_neurons_df = similar_neurons_df.applymap(str) #convert everything to a string

sim_comment_od = OrderedDict([])

for dn in raw_data_table.short:
    if dn in list(similar_neurons_df.Name):#can't use in with pd series
        sim_rows = similar_neurons_df[similar_neurons_df.Name == dn]#table with row for each similar neuron for one dn
        sim_name_list = list()
        #build up a statement for each row and add to list
        for r in sim_rows.index:
            sim_text = ""
            sim_text = sim_text + sim_rows.sim_name[r]
            if len(sim_rows.fbbt[r]) > 0:
                sim_text = sim_text + " (" + sim_rows.fbbt[r] + ")"
            sim_text = sim_text + ", described by " + sim_rows.Citation[r]
            if len(sim_rows.fbrf[r]) > 0:
                sim_text = sim_text + " (" + sim_rows.fbrf[r] + ")"
            sim_name_list.append(sim_text)
        #put together list items with ',' and 'and' in a sentence
        sim_comment_od[dn] = "Namiki et al., 2018 (FBrf0239335), identify this as being morphologically similar to "\
        + name_lister(sim_name_list) + "."
        sim_name_list.clear

#sim_comment_od


In [332]:
# Make a dictionary with key - column header & value = template specification (first row of table).
# Make first two columns

template_seed = OrderedDict([ ('ID' , 'ID'), ('CLASS_TYPE' , 'CLASS_TYPE'),  ('RDF_Type' , 'TYPE' )])

#label, description, short synonym:

template_seed.update([("Name" , "A rdfs:label"), ("Definition" , "A IAO:0000115"),\
                      ("Xref_def" , ">A oboInOwl:hasDbXref"), ("Comment" , "A rdfs:comment"),\
                      ("created_by" , "A oboInOwl:created_by"), ("creation_date", "A oboInOwl:creation_date")])

#removed as short name now part of label
#("Synonym" , "A oboInOwl:hasExactSynonym"), ("Xref_syn" , ">A oboInOwl:hasDbXref"), 

# Columns for DN group:
for n in DN_group_fbbts:
    template_seed.update([(n , "C %")])

# Columns for processes:
for n in processes:
    template_seed.update([(n , "C 'has synaptic terminal in' some %")])

# Columns for tract:
for n in tracts:
    template_seed.update([(n , "C 'fasciculates with' some %")])


# Create dataFrame for template
# from_records takes a list of dicts - one for each row.  We only have one row.

template = pd.DataFrame.from_records([template_seed])

template[0:3]

Unnamed: 0,ID,CLASS_TYPE,RDF_Type,Name,Definition,Xref_def,Comment,created_by,creation_date,FBbt:00047515,...,FBbt:00047523,FBbt:00047524,FBbt:00047525,FBbt:00047533,FBbt:00047544,FBbt:00047535,FBbt:00047534,FBbt:00047543,FBbt:00047541,FBbt:00047542
0,ID,CLASS_TYPE,TYPE,A rdfs:label,A IAO:0000115,>A oboInOwl:hasDbXref,A rdfs:comment,A oboInOwl:created_by,A oboInOwl:creation_date,C %,...,C 'has synaptic terminal in' some %,C 'has synaptic terminal in' some %,C 'has synaptic terminal in' some %,C 'fasciculates with' some %,C 'fasciculates with' some %,C 'fasciculates with' some %,C 'fasciculates with' some %,C 'fasciculates with' some %,C 'fasciculates with' some %,C 'fasciculates with' some %


In [333]:
def group_text_generator(group):
    if group == 'DNa':
        return "Descending neuron belonging to the DNa group, having a cell body on the anterior dorsal surface of the brain."
    elif group == 'DNb':
        return "Descending neuron belonging to the DNb group, having a cell body on the anterior ventral surface of the brain."
    elif group == 'DNc':
        return "Descending neuron belonging to the DNc group, having a cell body in the pars intercerebralis."
    elif group == 'DNd':
        return "Descending neuron belonging to the DNd group, having a cell body just lateral to the antennal lobe, on the anterior surface of the brain."
    elif group == 'DNg':
        return "Descending neuron belonging to the DNg group, having a cell body in the gnathal ganglion."
    elif group == 'DNp':
        return "Descending neuron belonging to the DNp group, having a cell body on the posterior surface of the brain."
    elif group == 'DNx':
        return "Descending neuron belonging to the DNx group, having a cell body outside of the brain."
    else: print("Please enter a valid group")


In [334]:
def cell_text_generator(cells):
    if cells > 1:
        return " There is a cluster of up to %s of these cells in each hemisphere."%(num2words(cells))
    elif cells == 1:
        return " There is one of these cells per hemisphere."
    

In [335]:
def side_text_generator(cross,side):
    c_s_dict = {"N" : " This neuron does not cross the midline", "Y" : " This neuron crosses the midline",\
                "I" : " and descends on the ipsilateral side of the cervical connective.",\
                "C" : " and descends on the contralateral side of the cervical connective."}
    return c_s_dict[cross] + c_s_dict[side]
    

In [336]:
def label_maker(short):
    letter_regions = {"a" : "of the anterior dorsal brain", "b" : "of the anterior ventral brain",\
                      "c" : "of the pars intercerebralis", "d" : "of the anterior brain",\
                      "g" : "of the gnathal ganglion", "p" : "of the posterior brain",\
                      "x" : "outside of the brain"}
    region = letter_regions[short[2]]
    return "descending neuron %s %s"%(region,short)

In [337]:
count = 0 #0 = DNa01

for i in raw_data_table.index:

    r = raw_data_table.short[count]
    row_od = OrderedDict([]) #new template row as an empty ordered dictionary
    for c in template.columns: #make columns and blank data for new template row
        row_od.update([(c , "")])
    
    #these are the same in each row
    row_od["CLASS_TYPE"] = "subclass"
    row_od["RDF_Type"] = "owl:Class"
    row_od["Xref_def"] = "FlyBase:FBrf0239335"
    #row_od["Xref_syn"] = "FlyBase:FBrf0239335"#removed as short name is now part of the label
    row_od["created_by"] = "CP"
    row_od["creation_date"] = "2018-08-23T12:00:00Z"

    #easy to generate data
    row_od["ID"] = DN_dict[r]
    #row_od["Synonym"] = r #removed as this is now part of the label
    row_od["Name"] = label_maker(r)
    if r in sim_comment_od:
        row_od["Comment"] = sim_comment_od[r]

    #is_a relationship
    is_a = lookup[r[0:3]]
    row_od[is_a] = is_a
    
    #comment about similar neurons
    
    
    #FOR SYNAPSING AND FASCICULATION
    #get column names where value > 0
    names = raw_data_table[:count].columns[(raw_data_table > 0).iloc[count]]
    #convert these to FBrfs
    FBrfs = list()
    for n in names:
        if n in lookup:
            FBrfs.append(lookup[n])
        else: continue
    #make these into columns in row_od
    for f in FBrfs:
        row_od[f] = f
    
        #STUFF FOR DEFINITION
    #DN group
    r_group = raw_data_table.Group[count]
    group_text = group_text_generator(r_group)
    #number_cells
    num_cells = raw_data_table.max_cells[count]
    number_cells_text = cell_text_generator(num_cells)
    #crossing and descending side
    cross = raw_data_table.crossing_midline[count]
    side = raw_data_table.Descending_side[count]
    side_text = side_text_generator(cross,side)
    
    #using x > 0 names generated for relationships
    synapsing_names = list()
    for n in names:
        if (n in nice_names) and (n in process_neuropil_names):
            synapsing_names.append(nice_names[n])
        else: continue
    
    synapses_in = name_lister(synapsing_names)
    if synapses_in != False:
        synapsing_text = " It has neurites in the " + synapses_in + "."
    else: synapsing_text = ""
    
    tract_names = list()
    for n in names:
        if (n in nice_names) and (n in pathway_tract_names):
            tract_names.append(nice_names[n])
        else: continue
            
    in_tracts = name_lister(tract_names)
    if in_tracts != False:
        tract_text = " It fasciculates with the " + in_tracts + " in the thoracico-abdominal ganglion."
    else: tract_text = ""

            
    row_od["Definition"] = group_text + side_text + synapsing_text + tract_text + number_cells_text
            
    
    #make new row into a DataFrame and add it to template
    new_row = pd.DataFrame.from_records([row_od])
    template = pd.concat([template, new_row], ignore_index=True, sort=False)

    count +=1
    
template

Unnamed: 0,ID,CLASS_TYPE,RDF_Type,Name,Definition,Xref_def,Comment,created_by,creation_date,FBbt:00047515,...,FBbt:00047523,FBbt:00047524,FBbt:00047525,FBbt:00047533,FBbt:00047544,FBbt:00047535,FBbt:00047534,FBbt:00047543,FBbt:00047541,FBbt:00047542
0,ID,CLASS_TYPE,TYPE,A rdfs:label,A IAO:0000115,>A oboInOwl:hasDbXref,A rdfs:comment,A oboInOwl:created_by,A oboInOwl:creation_date,C %,...,C 'has synaptic terminal in' some %,C 'has synaptic terminal in' some %,C 'has synaptic terminal in' some %,C 'fasciculates with' some %,C 'fasciculates with' some %,C 'fasciculates with' some %,C 'fasciculates with' some %,C 'fasciculates with' some %,C 'fasciculates with' some %,C 'fasciculates with' some %
1,FBbt:00047572,subclass,owl:Class,descending neuron of the anterior dorsal brain...,"Descending neuron belonging to the DNa group, ...",FlyBase:FBrf0239335,"Namiki et al., 2018 (FBrf0239335), identify th...",CP,2018-08-23T12:00:00Z,,...,,,,,,,FBbt:00047534,,,
2,FBbt:00047573,subclass,owl:Class,descending neuron of the anterior dorsal brain...,"Descending neuron belonging to the DNa group, ...",FlyBase:FBrf0239335,,CP,2018-08-23T12:00:00Z,,...,,,,,,,FBbt:00047534,,,
3,FBbt:00047574,subclass,owl:Class,descending neuron of the anterior dorsal brain...,"Descending neuron belonging to the DNa group, ...",FlyBase:FBrf0239335,,CP,2018-08-23T12:00:00Z,,...,,,,,,FBbt:00047535,,,,
4,FBbt:00047575,subclass,owl:Class,descending neuron of the anterior dorsal brain...,"Descending neuron belonging to the DNa group, ...",FlyBase:FBrf0239335,,CP,2018-08-23T12:00:00Z,,...,,,,,,FBbt:00047535,,,,
5,FBbt:00047576,subclass,owl:Class,descending neuron of the anterior dorsal brain...,"Descending neuron belonging to the DNa group, ...",FlyBase:FBrf0239335,"Namiki et al., 2018 (FBrf0239335), identify th...",CP,2018-08-23T12:00:00Z,,...,,,,,,FBbt:00047535,,,,
6,FBbt:00047577,subclass,owl:Class,descending neuron of the anterior dorsal brain...,"Descending neuron belonging to the DNa group, ...",FlyBase:FBrf0239335,,CP,2018-08-23T12:00:00Z,,...,,,,,,,FBbt:00047534,,,
7,FBbt:00047578,subclass,owl:Class,descending neuron of the anterior dorsal brain...,"Descending neuron belonging to the DNa group, ...",FlyBase:FBrf0239335,,CP,2018-08-23T12:00:00Z,,...,,,,,,FBbt:00047535,,,,
8,FBbt:00047579,subclass,owl:Class,descending neuron of the anterior dorsal brain...,"Descending neuron belonging to the DNa group, ...",FlyBase:FBrf0239335,,CP,2018-08-23T12:00:00Z,,...,,,,,,FBbt:00047535,,,,
9,FBbt:00047580,subclass,owl:Class,descending neuron of the anterior dorsal brain...,"Descending neuron belonging to the DNa group, ...",FlyBase:FBrf0239335,,CP,2018-08-23T12:00:00Z,,...,,,,,,FBbt:00047535,,,,


In [338]:
template.to_csv("./template.tsv", sep = "\t", header=True, index=False)