Notebook to make a robot template for the new manc cell types

In [None]:
import pandas as pd
import re
import ast

In [None]:
# ids for types
new_cell_FBbt_ids = pd.read_csv('new_cell_FBbt_ids.tsv', sep='\t', index_col='type')

In [None]:
# open type detail files
typing_info = pd.read_csv('typing_info.tsv', sep='\t', index_col='type', 
                          dtype={'defaultdict': 'object', 'count': 'int', 'type': 'str'})

set_cols = typing_info.columns.drop('count')
typing_info[set_cols] = typing_info[set_cols].map(ast.literal_eval)


In [None]:
class_FBbt_map = pd.read_csv('class_FBbt_map.tsv', sep='\t', index_col='term')
subclass_detail = pd.read_csv('subclass_detail.tsv', sep='\t', index_col='term', na_filter=False, dtype=str)
hemilineage_notch_FBbt_map = pd.read_csv('hemilineage_notch_FBbt_map.tsv', sep='\t', index_col='term')
hemilineage_nb_FBbt_map = pd.read_csv('hemilineage_nb_FBbt_map.tsv', sep='\t', index_col='term')
birthtime_FBbt_map = pd.read_csv('birthtime_FBbt_map.tsv', sep='\t', index_col='term')
nerve_FBbt_map = pd.read_csv('nerve_FBbt_map.tsv', sep='\t', index_col='term')
neuromere_FBbt_map = pd.read_csv('neuromere_FBbt_map.tsv', sep='\t', index_col='term')
region_FBbt_map = pd.read_csv('region_FBbt_map.tsv', sep='\t', index_col='term')
region_FBbt_map.loc['multi', 'FBbt_name'] = 'multiple regions'
tract_FBbt_map = pd.read_csv('tract_FBbt_map.tsv', sep='\t', index_col='term')
nt_go_map = pd.read_csv('nt_go_map.tsv', sep='\t', index_col='term')

In [None]:
short_types = {'AN':'ascending neuron', 'DN':'descending neuron', 
               'EA': 'efferent ascending neuron', 'EN': 'efferent neuron',
               'MN':'motor neuron', 'SN':'sensory neuron', 
               'SA':'sensory ascending neuron', 'IN':'intrinsic neuron'}

In [None]:
def cv_lookup(term, mapping):
    try:
        FBbt_id = mapping.loc[term, 'FBbt_id']
    except:
        return None
    if type(FBbt_id) == str:
        return FBbt_id
    else:
        return None

In [None]:
def label_lookup(term, mapping):
    try:
        FBbt_label = mapping.loc[term, 'FBbt_name']
    except:
        return None
    if type(FBbt_label) == str:
        return FBbt_label.removeprefix('adult ')
    else:
        return None

In [None]:
def name_lister(names):
    L = ""
    names.sort()
    if len(names) < 1:
        return False
    elif len(names) == 1:
        return names[0]
    elif len(names) > 1:
        L = names[0]
        if len(names) > 2:
            for i in names[1:-1]:
                L = L + ", " + i
        L = L + " and " + names[-1]
        return L

In [None]:
from collections import OrderedDict

# template header
template_head = OrderedDict([('ID', 'ID'), ('TYPE', 'TYPE'), ('Label', 'LABEL'), 
                             ("obo_id" , "A oboInOwl:id"), ("obo_namespace" , "A oboInOwl:hasOBONamespace"), 
                             ('Definition', 'A IAO:0000115'), 
                             ('Def_xrefs', '>A oboInOwl:hasDbXref SPLIT=|'), ('Comment', 'A rdfs:comment'),  
                             ('Creator', 'AI dc:contributor'), 
                             ('Date', 'AT dc:date^^xsd:dateTime'), 
                             ('Soma', 'SC RO:0002100 some %'), ('Parents', 'SC % SPLIT=|'), 
                             ('Lineage','SC RO:0002202 some %'), ('Laterality', 'SC RO:0000053 some %'), 
                             ('Projection_bundles', 'SC RO:0002101 some % SPLIT=|'),
                             ('Neurotransmitter', 'SC RO:0002215 some %'), 
                             ('Presynapses', 'SC RO:0013003 some % SPLIT=|'), 
                             ('Postsynapses', 'SC RO:0013002 some % SPLIT=|'), 
                             ('Sens_dend', 'SC RO:0013007 some % SPLIT=|')])
template = pd.DataFrame.from_dict([template_head])
#template

In [None]:
# Build template one row at a time for each new class
refs = ' (Takemura et al., 2023; Marin et al., 2024).'
for i in new_cell_FBbt_ids.index: # index is the type name (same as index for typing_info)
    
    # create a dictionary to hold information for this class
    row = OrderedDict([])
    for c in template.columns:
        row[c] = "" # setting all as "" now to avoid awkward NaNs later

    # populate row dictionary
    
    # annotation axioms
    row['ID'] = new_cell_FBbt_ids['FBbt_id'][i]
    row['obo_id'] = new_cell_FBbt_ids['FBbt_id'][i]
    row['obo_namespace'] = 'fly_anatomy.ontology'
    row['TYPE'] = 'owl:Class'
    row['Label'] = f'adult {i} neuron'
    # xrefs for Marin et al. (2024), Takemura et al. (2023), Cheong et al. (2024)
    row['Def_xrefs'] = 'doi:10.1101/2023.06.05.543407|doi:10.1101/2023.06.05.543757|doi:10.7554/eLife.96084.1'
    row['Comment'] = ("Uncharacterized putative cell type from Marin et al. (2024), based on "
                      "MANC v1.2.1 data (Takemura et al., 2023) from NeuPrint.")
                      
    row['Creator'] = "https://orcid.org/0000-0002-1373-1705"
    if row['ID'] in ['FBbt:20004627', 'FBbt:20004148']:
        row['Date'] = "2024-07-24T12:00:00Z"
    else:
        row['Date'] = "2024-05-10T12:00:00Z"
    
    # establish lists for collecting multiple elements
    Parents_list = []
    projection_bundles = []
    definition_components = []
    
    # process each column of the data to add logical axioms and build definition

    # class
    try:
        [cell_class] = typing_info.loc[i, 'cell_class']
    except ValueError:
        cell_class = short_types.get(i[0:2], 'neuron')
    Parents_list.append(cv_lookup(cell_class, class_FBbt_map))
    if 'neuron' not in cell_class:
        cell_class += ' neuron'
    
    
    # info from subclass
    type_update = ''
    to_append = ''
    
    name_subclass = i[2:4]
    try:
        if subclass_detail.loc[name_subclass, 'parent']:
            Parents_list.append(subclass_detail.loc[name_subclass, 'parent'])
        type_update += subclass_detail.loc[name_subclass, 'type_update']
        to_append += subclass_detail.loc[name_subclass, 'append']
    except KeyError:
        pass
    
    try:
        [subclass] = typing_info.loc[i, 'subclass']
        if subclass_detail.loc[subclass, 'parent']:
            Parents_list.append(subclass_detail.loc[subclass, 'parent'])
        row['Laterality'] = subclass_detail.loc[subclass, 'laterality']
        type_update += ' ' + subclass_detail.loc[subclass, 'type_update']
        to_append += ' ' + subclass_detail.loc[subclass, 'append']
    except ValueError:
        pass
    
    first_sentence = ' '.join(['Adult', type_update,
                                   cell_class, to_append]).replace('  ', ' ').strip(' ')
    first_sentence = re.sub('[ ]+', ' ', first_sentence)
    first_sentence = re.sub('sensory sensory', 'sensory', first_sentence)
    
    definition_components.append(first_sentence + refs)

    # birth time and hemilineage
    
    try:
        [birthtime] = typing_info.loc[i, 'birthtime']
        Parents_list.append(cv_lookup(birthtime, birthtime_FBbt_map))
        birth_lineage = f"It is a {birthtime} neuron"
        try:
            [hemilineage] = typing_info.loc[i, 'hemilineage']
            Parents_list.append(cv_lookup(hemilineage, hemilineage_notch_FBbt_map))
            row['Lineage'] = cv_lookup(hemilineage, hemilineage_nb_FBbt_map)
            birth_lineage += f" of the {hemilineage} hemilineage"
        except ValueError:
            pass
        definition_components.append(birth_lineage + refs)
    except ValueError:
        try:
            [hemilineage] = typing_info.loc[i, 'hemilineage']
            Parents_list.append(cv_lookup(hemilineage, hemilineage_notch_FBbt_map))
            row['Lineage'] = cv_lookup(hemilineage, hemilineage_nb_FBbt_map)
            definition_components.append(f"It belongs to the {hemilineage} hemilineage" + refs)
        except ValueError:
            pass

    # nerves and tracts
    nerves = ''
    try:
        [entry_nerve] = typing_info.loc[i, 'common_entryNerve']
        projection_bundles.append(cv_lookup(entry_nerve, nerve_FBbt_map))
        nerves += f'It enters the VNC via the {label_lookup(entry_nerve, nerve_FBbt_map)}'
        exit_nerve_join = ' and exits via the'
    except ValueError:
        exit_nerve_join = 'It exits the VNC via the'
    if len(typing_info.loc[i, 'common_exitNerve']) > 0:
        projection_bundles.extend([cv_lookup(n, nerve_FBbt_map) for n in typing_info.loc[i, 'common_exitNerve']])
        exitnerves = list(set([label_lookup(l, nerve_FBbt_map) for l in typing_info.loc[i, 'common_exitNerve']]))
        nerves += f"{exit_nerve_join} {name_lister(exitnerves)}"
    if nerves:
        definition_components.append(nerves + refs)
    
    if len(typing_info.loc[i, 'common_longTract']) > 0:
        projection_bundles.extend([cv_lookup(n, tract_FBbt_map) for n in typing_info.loc[i, 'common_longTract']])
        tracts = list(set([label_lookup(l, tract_FBbt_map) for l in typing_info.loc[i, 'common_longTract']]))
        definition_components.append(f'Within the VNC it fasciculates with the {name_lister(tracts)}' + refs)
    

    # regions
    synapses = ''
    if len(typing_info.loc[i, 'common_origin']) > 0:
        origin_ids = list(set([cv_lookup(l, region_FBbt_map) for l in typing_info.loc[i, 'common_origin']]))
        if ('sensory' in cell_class):
            row['Sens_dend'] = '|'.join([i for i in origin_ids if i])
        else:
            row['Postsynapses'] = '|'.join([i for i in origin_ids if i])
        origin_names = list(set([label_lookup(l, region_FBbt_map) for l in typing_info.loc[i, 'common_origin']]))
        if any(origin_names):
            synapses += f"It receives input in the {name_lister([i for i in origin_names if i])}"
            synapse_join = " and"
        else:
            synapse_join = "It"
    else:
        synapse_join = "It"
        
    if len(typing_info.loc[i, 'common_target']) > 0:
        target_ids = list(set([cv_lookup(l, region_FBbt_map) for l in typing_info.loc[i, 'common_target']]))
        row['Presynapses'] = '|'.join([i for i in target_ids if i])
        target_names = list(set([label_lookup(l, region_FBbt_map) for l in typing_info.loc[i, 'common_target']]))
        if any(target_names):
            synapses += f"{synapse_join} sends output to the {name_lister([i for i in target_names if i])}"
    if synapses:
        synapses = re.sub('the multiple', 'multiple', synapses)
        definition_components.append(synapses + refs)
    
    # nt
    try:
        [neurotransmitter] = typing_info.loc[i, 'celltypePredictedNt']
        row['Neurotransmitter'] = cv_lookup(neurotransmitter, nt_go_map)
        definition_components.append(f"Its predicted neurotransmitter is {neurotransmitter} (Eckstein et al., 2024).")
    except ValueError:
        pass
    
    # cell number and somas
    cell_soma = ''
    cell_count = typing_info.loc[i, 'count']
    if cell_count == 1:
        cell_soma += "There is approximately one of these cells per organism"
        soma_mod = ' with its soma in'
    elif cell_count > 1:
        cell_soma += f"There are approximately {str(cell_count)} of these cells per organism"
        soma_mod = ' and their somas are found in'
    else:
        raise ValueError("Cell count must be >= 1")
    
    try:
        [somaNeuromere] = typing_info.loc[i, 'somaNeuromere']
        row['Soma'] = cv_lookup(somaNeuromere, neuromere_FBbt_map)
        cell_soma += f"{soma_mod} the {label_lookup(somaNeuromere, neuromere_FBbt_map)}"
    except ValueError:
        if len(typing_info.loc[i, 'somaNeuromere'])>1:
            neuromeres = list(set([
                label_lookup(l, neuromere_FBbt_map) for l in typing_info.loc[i, 'somaNeuromere']]))
            cell_soma += f"{soma_mod} the {name_lister(neuromeres)}"
        else:
            pass
    if cell_soma:
        definition_components.append(cell_soma + refs)
    
    # add multi-part entries (drop any Nones from ID lists)
    row['Parents'] = '|'.join([i for i in Parents_list if type(i)==str])
    row['Projection_bundles'] = '|'.join([i for i in projection_bundles if type(i)==str])
    row['Definition'] = ' '.join(definition_components)
        
    # turn into dataframe and join to template
    row_df = pd.DataFrame.from_dict([row])
    template = pd.concat([template, row_df], ignore_index=True)

In [None]:
# add rows to specify TYPE for other classes and OPs
# this is a workaround for a robot bug - https://github.com/ontodev/robot/issues/1105

# function to get curies
def extract_uris(text):
    return re.findall(r'\b(?:FBbt|GO|PATO|RO):\d+\b', str(text))

In [None]:
curies = []
for column in template.columns:
    if column != 'ID':
        new_uris = template[column].apply(extract_uris)
        # Filter out empty lists
        new_uris = new_uris[new_uris.apply(lambda x: len(x) > 0)].explode()
        curies.extend(new_uris)
        curies = list(set(curies))

#curies

In [None]:
# make a row for each
for x in curies:
    
    # create a dictionary to hold information for this class
    row = OrderedDict([])
    for c in template.columns:
        row[c] = "" # setting all as "" now to avoid awkward NaNs later
    
    # populate dictionary
    
    # annotation axioms
    row['ID'] = x
    if x.startswith('RO:'):
        row['TYPE'] = 'owl:ObjectProperty'
    else:
        row['TYPE'] = 'owl:Class'
    
    # turn into dataframe and join to template
    row_df = pd.DataFrame.from_dict([row])
    template = pd.concat([template, row_df], ignore_index=True)

#template.head()

In [None]:
template.to_csv('template.tsv', sep='\t', index=None)