In [None]:
import pandas as pd
import re
import csv
from collections import OrderedDict

# Turn CSV into a pandas DataFrame

raw_data_table = pd.read_csv('./Baker_cell_types.tsv', sep='\t')
raw_data_table = raw_data_table[raw_data_table['FBbt_ID'].notnull()]

#raw_data_table.head()

In [None]:
# only using auditory system neurons found in EM
raw_data_table = raw_data_table[raw_data_table["FBbt_ID"].str.contains("FBbt:0")]


In [None]:
# load ROI mapping, remove left/right, keep only capitalised regions

roi_mapping = pd.read_csv('./hemibrain_1-1_ROI_mapping.tsv', sep='\t')
roi_mapping['ROI'] = roi_mapping['ROI'].map(lambda x: re.compile('\([LR]+\)').sub('',x))
roi_mapping = roi_mapping[roi_mapping['ROI'].str.match('[A-Z]+$')==True]
roi_mapping = roi_mapping.drop_duplicates().set_index('ROI')

#roi_mapping.head()

In [None]:
def name_lister(names):
    L = ""
    if len(names) < 1:
        return False
    elif len(names) == 1:
        return names[0]
    elif len(names) > 1:
        L = names[0]
        if len(names) > 2:
            for i in names[1:-1]:
                L = L + ", " + i
        L = L + " and " + names[-1]
        return L

In [None]:

# Annotations
template_seed = OrderedDict([('ID' , 'ID'), ("Label" , "A rdfs:label"), \
                             ("Definition" , "A IAO:0000115"), ("def_ref" , ">A oboInOwl:hasDbXref"), \
                             ("Comment" , "A rdfs:comment"), ("created_by" , "A dc:contributor"), \
                             ("creation_date", "A dc:date"), \
                             ("Synonym" , "A oboInOwl:hasExactSynonym"), \
                             ("syn_ref" , ">A oboInOwl:hasDbXref")]) 

# Relationships:
template_seed.update([("Parent" , "C %"), ("Terminals" , "C 'has synaptic IO in region' some % SPLIT=|"), \
                      ("Soma" , "C 'has soma location' some %"), \
                      ("Function", "C 'capable of part of' some %"), \
                      ("Laterality", "C 'has characteristic' some %"), \
                      ("Neurotransmitter", "C 'capable of' some %")])

template = pd.DataFrame.from_records([template_seed])


In [None]:
count = 0

for i in raw_data_table.index:

    row_od = OrderedDict([]) #new template row as an empty ordered dictionary
    for c in template.columns: #make columns and blank data for new template row
        row_od.update([(c , "")])

    #these are the same in each row
    row_od["def_ref"] = "doi:10.1101/2020.10.08.332148"
    row_od["syn_ref"] = "doi:10.1101/2020.10.08.332148"
    row_od["created_by"] = "http://orcid.org/0000-0002-1373-1705"
    row_od["creation_date"] = "2022-02-03T12:00:00Z"
    row_od["Parent"] = "FBbt:00005106"  # neuron
    row_od["Soma"] = "FBbt:00003625"  # adult brain CBR

    #easy to generate data
    row_od["ID"] = raw_data_table['FBbt_ID'][i]
    row_od["Synonym"] = raw_data_table['Baker_name'][i]
    row_od["Function"] = 'GO:0007605'

    # comment
    row_od["Comment"] = "Cell type identified at light level and in EM data (Baker et al., 2022)."

    if raw_data_table['neurotransmitter'][i] in ['D', 'C', 'G']:
        row_od["Comment"] += " Neurotransmitter predicted from EM data (Baker et al., 2022)."

    # neurotransmitter
    if raw_data_table['neurotransmitter'][i] == 'D':
        row_od["Neurotransmitter"] = 'GO:0061527'
    elif raw_data_table['neurotransmitter'][i] == 'G':
        row_od["Neurotransmitter"] = 'GO:0061534'
    elif raw_data_table['neurotransmitter'][i] == 'C':
        row_od["Neurotransmitter"] = 'GO:0014055'

    # laterality
    if raw_data_table['laterality'][i] == 'U':
        row_od["Laterality"] = 'PATO:0000618'
    elif raw_data_table['laterality'][i] == 'B':
        row_od["Laterality"] = 'PATO:0000634'

    # Synapse locations
    main_region_names = raw_data_table["main_regions"][i].split(',')
    try:
        other_region_names = raw_data_table["other_regions"][i].split(',')
        all_region_names = main_region_names + other_region_names
    except:
        other_region_names = False
        all_region_names = main_region_names

    Region_IDs = []
    for n in all_region_names:
        Region_IDs.append(roi_mapping["FBbt_id"][n])
    row_od["Terminals"] = '|'.join(Region_IDs)


    # Definition
    main_region_label_list = []
    for n in main_region_names:
        main_region_label_list.append(roi_mapping["FBbt_name"][n].replace('adult ', ''))
        main_region_text = name_lister(main_region_label_list)

    if other_region_names:
        other_region_label_list = []
        for n in other_region_names:
            other_region_label_list.append(roi_mapping["FBbt_name"][n].replace('adult ', ''))
            other_region_text = name_lister(other_region_label_list)


    firstsentence = "Auditory system neuron of the adult brain "

    if raw_data_table["Song_type"][i] == 'S':
        firstsentence += ("that preferentially responds to male sine, "
                          "rather than pulse, courtship song (Baker et al., 2022).")
    elif raw_data_table["Song_type"][i] == 'P':
        firstsentence += ("that preferentially responds to male pulse, "
                          "rather than sine, courtship song (Baker et al., 2022).")
    elif raw_data_table["Song_type"][i] == 'SP':
        firstsentence += ("that responds similarly to male sine "
                          "and pulse courtship song (Baker et al., 2022).")
    else:
        raise ValueError("No song type for auditory neuron %s!" % raw_data_table['Baker_name'][i])

    if other_region_names:
        secondsentence = (" Its main innervation is in the %s, it also innervates the %s"
                        % (main_region_text, other_region_text))
    else:
        secondsentence = (" Its main innervation is in the %s"
                        % (main_region_text))

    if raw_data_table["laterality"][i] == 'U':
        secondsentence += " and it does not cross the midline (Baker et al., 2022)."
    elif raw_data_table["laterality"][i] == 'B':
        secondsentence += " and it crosses the midline (Baker et al., 2022)."
    else:
        secondsentence += " (Baker et al., 2022)."

    if raw_data_table['neurotransmitter'][i] == 'D':
        thirdsentence = " It is dopaminergic (Baker et al., 2022)."
    elif raw_data_table['neurotransmitter'][i] == 'G':
        thirdsentence = " It is GABAergic (Baker et al., 2022)."
    elif raw_data_table['neurotransmitter'][i] == 'C':
        thirdsentence = " It is cholinergic (Baker et al., 2022)."
    else:
        thirdsentence = ""


    row_od["Definition"] = firstsentence + secondsentence + thirdsentence

    row_od["Label"] =  "adult %s neuron %s" % (main_region_text, raw_data_table['Baker_name'][i])


    #make new row into a DataFrame and add it to template
    new_row = pd.DataFrame.from_records([row_od])
    template = pd.concat([template, new_row], ignore_index=True, sort=False)

    count +=1



In [None]:
template.to_csv("template.tsv", sep='\t', index=False)