In [None]:
import pandas as pd
from collections import OrderedDict

raw_data_table = pd.read_csv('./new_SEZ_cells.tsv', sep='\t', dtype='str', na_filter=False)
neuropil_table = pd.read_csv('./neuropil_detail.tsv', sep='\t', index_col='short_name')
soma_table = pd.read_csv('./soma_locations.tsv', sep='\t', index_col='short_name')
category_table = pd.read_csv('./neuron_categories.tsv', sep='\t', index_col='short_name')
# raw_data_table

In [None]:
raw_data_table['dend'] = raw_data_table['dend'].apply(lambda x: x.split('|') if x else [])
raw_data_table['axon'] = raw_data_table['axon'].apply(lambda x: x.split('|') if x else [])
raw_data_table['dend_axon'] = raw_data_table['dend_axon'].apply(lambda x: x.split('|') if x else [])

# raw_data_table

In [None]:
#generate FBbt IDs for all new SEZ neurons
start = 51851
new_IDs = {}
    
for i in raw_data_table.index:
    x = start + i
    ID = "FBbt:000"+str(x)
    new_IDs[raw_data_table['type'][i]] = ID

# new_IDs

In [None]:
def name_lister(names):
    L = ""
    if len(names) < 1:
        return False
    elif len(names) == 1:
        return names[0]
    elif len(names) > 1:
        L = names[0]
        if len(names) > 2:
            for i in names[1:-1]:
                L = L + ", " + i
        L = L + " and " + names[-1]
        return L
        

In [None]:
def definition_maker(cat_code, category,polarity,soma,dend,axon,dend_axon,laterality):
    first_sentence = "Adult %s with its soma in the %s (Sterne et al., 2021)." % (category, soma)
    if dend:
        arborization = " It has dendritic arborization in the %s" % name_lister(dend)
    if axon:
        if dend and dend_axon:
            arborization += ", axonal arborization in the %s" % name_lister(axon)
        elif dend:
            arborization += " and axonal arborization in the %s" % name_lister(axon)
        else:
            arborization = " It has axonal arborization in the %s" % name_lister(axon)
    if dend_axon:
        if dend or axon:
            arborization += " and both dendritic and axonal arborization in the %s" % name_lister(dend_axon)
        else:
            arborization = " It has both dendritic and axonal arborization in the %s" % name_lister(dend_axon)

    if cat_code in ['i', 'l'] and not (dend or axon):
        if len(dend_axon) > 1:
            arborization += ", with %s synapse distribution within these regions" % polarity
        elif len(dend_axon) == 1:
            arborization += ", with %s synapse distribution within this region" % polarity
    
    arborization += " (Sterne et al., 2021)."
    if cat_code == 'e':
        arborization += " It also projects outside of the central nervous system (Sterne et al., 2021)."
    
    if laterality == 'i':
        lat_text = " is ipsilateral"
    elif laterality == 'i_m':
        lat_text = " is mainly ipsilateral, but arborizes close to the midline"
    elif laterality == 'b_i':
        lat_text = " is bilateral, but most of its innervation is in the ipsilateral hemisphere"
    elif laterality == 'b_c':
        lat_text = " is bilateral, with most of its innervation in the contralateral hemisphere"
    elif laterality == 'b_e':
        lat_text = " is bilateral, with similar innervation in each hemisphere"
    elif laterality == 'b':
        lat_text = " is bilateral"
    else:
        raise ValueError("laterality type not recognised")
    if cat_code == 'd_i':
        lat_text = " In the brain it" + lat_text + ", and it descends on the ipsilateral side"
    elif cat_code == 'd_c':
        lat_text = " In the brain it" + lat_text + ", and it descends on the contralateral side"
    elif cat_code in ['s', 'e']:
        lat_text = " In the brain it" + lat_text
    else:
        lat_text = " It" + lat_text
    lat_text += " (Sterne et al., 2021)."
        
    return first_sentence + arborization + lat_text
    

In [None]:
# Make a dictionary with key - column header & value = template specification (first row of table).

template_seed = OrderedDict([ ('ID' , 'ID')])

#label, description, creator, date:

template_seed.update([("Name" , "A rdfs:label"), ("Definition" , "A IAO:0000115"),\
                      ("Xref_def" , ">A oboInOwl:hasDbXref"),\
                      ("created_by" , "A dc:contributor"),\
                      ("creation_date", "A dc:date")])

# soma, input, output

template_seed.update([("Soma" , "SC 'has soma location' some %"),\
                      ("Input" , "SC 'receives synaptic input in region' some % SPLIT=|"),\
                      ("Output" , "SC 'sends synaptic output to region' some % SPLIT=|"),\
                      ("Category" , "SC %"), ("Laterality", "SC 'has characteristic' some %")])


# Create dataFrame for template
# from_records takes a list of dicts - one for each row.  We only have one row.

template = pd.DataFrame.from_records([template_seed])

# template

In [None]:
#count = 0

for i in raw_data_table.index:

    row_od = OrderedDict([]) # new template row as an empty ordered dictionary
    for c in template.columns: # make columns and blank data for new template row
        row_od.update([(c , "")])
    
    # these are the same in each row
    row_od["Xref_def"] = "FlyBase:FBrf0251280"
    row_od["created_by"] = "http://orcid.org/0000-0002-1373-1705"
    row_od["creation_date"] = "2021-10-22T12:00:00Z"

    # easy to generate data
    row_od["ID"] = new_IDs[raw_data_table.type[i]]
    row_od["Name"] = "adult %s subesophageal neuron" % raw_data_table.type[i]

    #is_a relationship
    row_od["Category"] = category_table["FBbt_id"][raw_data_table['class'][i]]
    
    # inputs/output
    inputs = raw_data_table['dend'][i] + raw_data_table['dend_axon'][i]
    if inputs:
        input_ids = [neuropil_table['FBbt_id'][n] for n in inputs if len(n)>0]
        row_od["Input"] = '|'.join(input_ids)
    outputs = raw_data_table['axon'][i] + raw_data_table['dend_axon'][i]
    if outputs:
        output_ids = [neuropil_table['FBbt_id'][n] for n in outputs if len(n)>0]
        row_od["Output"] = '|'.join(output_ids)
    
    # soma location
    row_od["Soma"] = soma_table["FBbt_id"][raw_data_table["soma"][i]]
    
    # laterality
    if raw_data_table['laterality'][i] == 'i' and raw_data_table['class'][i] not in ['d_i', 'd_c', 's', 'e']:
        row_od["Laterality"] = "PATO:0000634"
    if raw_data_table['laterality'][i] in ['b', 'b_e', 'b_i', 'b_c']:
        row_od["Laterality"] = "PATO:0000618"
    
    
    #STUFF FOR DEFINITION
    polarity_dict = {'p' : 'polarized', 'b' : 'biased', 'm' : 'mixed'}
    
    if raw_data_table["dend"][i]:
        dendrite_loc = [neuropil_table["nice_label"][n] for n in raw_data_table["dend"][i]]
    else:
        dendrite_loc = False
    if raw_data_table["axon"][i]:
        axon_loc = [neuropil_table["nice_label"][n] for n in raw_data_table["axon"][i]]
    else:
        axon_loc = False
    if raw_data_table["dend_axon"][i]:
        dend_axon_loc = [neuropil_table["nice_label"][n] for n in raw_data_table["dend_axon"][i]]
    else:
        dend_axon_loc = False

    # cat_code,category,soma,dend,axon,dend_axon, laterality
    row_od["Definition"] = definition_maker(raw_data_table['class'][i], 
                                            category_table["nice_label"][raw_data_table['class'][i]], 
                                            polarity_dict[raw_data_table["polarity"][i]], 
                                            soma_table["nice_label"][raw_data_table["soma"][i]], 
                                            dendrite_loc, 
                                            axon_loc, 
                                            dend_axon_loc, 
                                            raw_data_table["laterality"][i])
    
    
            
    
    #make new row into a DataFrame and add it to template
    new_row = pd.DataFrame.from_records([row_od])
    template = pd.concat([template, new_row], ignore_index=True, sort=False)

    
# template

In [None]:
template.to_csv("./template.tsv", sep = "\t", header=True, index=False)