# Loading NeuroArch Database with FlyCircuit Dataset v1.2

This tutorial provides code to load NeuroArch database with Hemibrain Dataset v1.1. Requirement before running the notebook:
- Installed [NeuroArch](https://github.com/fruitflybrain/neuroarch), [OrientDB Community Version](https://www.orientdb.org/download), and [pyorient](https://github.com/fruitflybrain/pyorient/). The [NeuroNLP Docker image](https://hub.docker.com/r/fruitflybrain/neuronlp) and [FlyBrainLab Docker image](https://hub.docker.com/r/fruitflybrain/fbl) all have a copy of the software requirement ready.
- Have about 6 GB free disk space.
- Download this [data file](https://drive.google.com/file/d/1oI1wu4FZfXbrTxmfCwSGuONQLBk4eQDg/view?usp=sharing) and uncompress the two enclosed files to the current folder.
- Download the [neuron skeletons](https://drive.google.com/file/d/1UGRspq60xh8SgK_DSFbMYmF7ML0BFCPE/view?usp=sharing) and uncompress.

A backup of the database created by this notebook can be downloaded [here](https://drive.google.com/file/d/1JXtWt-2X66Mb5I271YRUiMuQx3I2b43s/view?usp=sharing). To restore it in OrientDB, run
```
/path/to/orientdb/bin/console.sh "create database plocal:../databases/flycircuit admin admin; restore database /path/to/flycircuit1.2_na_v1.0_backup.zip"
```

In [None]:
import os
import urllib.request, json

import numpy as np
import pandas as pd
from tqdm import tqdm

import neuroarch.na as na


## Define All Neuropils

In [None]:
neuropils = \
['al',
'AL',
'ammc',
'AMMC',
'cal',
'CAL',
'ccp',
'CCP',
'cmp',
'CMP',
'cvlp',
'CVLP',
'dlp',
'DLP',
'dmp',
'DMP',
'EB',
'FB',
'fspp',
'FSPP',
'idfp',
'IDFP',
'idlp',
'IDLP',
'lat',
'LAT',
'lh',
'LH',
'lob',
'LOB',
'lop',
'LOP',
'mb',
'MB',
'med',
'MED',
'nod',
'NOD',
'og',
'OG',
'optu',
'OPTU',
'pan',
'PAN',
'PB',
'sdfp',
'SDFP',
'sog',
'SOG',
'spp',
'SPP',
'vlp',
'VLP',
'vmp',
'VMP']

## Loading NeuroArch Database

### Create and connect to database. mode 'o' overwrites the entire database.

In [None]:
flycircuit = na.NeuroArch('localhost', 'flycircuit', mode = 'o')

### Create a species and datasource under the species.

In [None]:
species = flycircuit.add_species('Drosophila melanogaster', stage = 'adult', sex = 'female', synonyms = ['fruit fly', 'common fruit fly', 'vinegar fly'])
version = '1.2'
datasource = flycircuit.add_DataSource('FlyCircuit', version = version, url = 'http://flycircuit.tw', species = species)
flycircuit.default_DataSource = datasource

### Load all neuropils and the mesh defining their boundaries.

In [None]:
for neuropil in neuropils:
    suffix = ''
    if neuropil[0].isupper():
        if neuropil.lower() in neuropils:
            suffix = '_r'
    else:
        suffix = '_l'
    url = "https://raw.githubusercontent.com/fruitflybrain/ffbo.lib/hemibrain/mesh/flycircuit/{}{}.json".format(neuropil.lower(), suffix)
    with urllib.request.urlopen(url) as f:
        data = json.loads(f.read().decode())
    flycircuit.add_Neuropil(neuropil, morphology = {'type': 'mesh', 'faces': data['faces'], 'vertices': data['vertices']})

### Load all neurons.

In [None]:
neuron_list = pd.read_csv('FlyCircuit1.2_Neurons.csv', sep = ';')
swc_dir = 'swc'

In [None]:
def load_swc(file_name):
    """
    Load an SWC file into a DataFrame.
    """

    df = pd.read_csv(file_name, sep = ' ', header=None, comment='#', index_col = False,
                     names=['sample', 'identifier', 'x', 'y', 'z', 'r', 'parent'],
                     skipinitialspace=True)
    return df

In [None]:
uname_dict = {}
fields = ['Author', 'Driver', \
          'Putative birth time', 'Lineage']


for i, row in tqdm(neuron_list.iterrows()):
    uname = row['name']
    name = row['name']
    if not os.path.exists('{}/{}.swc'.format(swc_dir, name)):
        print('{} does not have a swc file'.format(name))
        continue
    dendrite = row['dendrite']
    axon = row['axon']
    total = row['total']
    neuropil = row['neuropil']
    info = {n: row[n] for n in fields if row[n] not in ['', 'unknown'] and isinstance(row[n], str)}
    
    
    dendrites = {pil.split(':')[0]: int(pil.split(':')[1]) for pil in dendrite.split(',')}
    axons = {pil.split(':')[0]: int(pil.split(':')[1]) for pil in axon.split(',')}
    
    if (len(dendrites) == 1 and 'unknown' in dendrites) or (len(axons) == 1 and 'unknown' in axons):
        if neuropil != 'unclear':
            totals = {pil.split(':')[0]: int(pil.split(':')[1]) for pil in total.split(',')}
            if len(totals) == 1:
                dendrites = totals
                axons = totals
            else:
                if 'unknown' in dendrites:
                    dendrites = {}
                if 'unknown' in axons:
                    axons = {}
        else:
            if 'unknown' in dendrites:
                dendrites = {}
            if 'unknown' in axons:
                axons = {}
    
    arborization = [{'type': 'neuropil',
                     'dendrites': dendrites,
                     'axons': axons
                    }]
    if row['Putative neurotransmitter'] != 'unknown':
        neurotransmitters = [row['Putative neurotransmitter']]
    else:
        neurotransmitters = None
        
    df = load_swc('{}/{}.swc'.format(swc_dir, name))
    morphology = {'x': [round(i, 2) for i in (df['x']).tolist()],
    'y': [round(i, 2) for i in (df['y']).tolist()],
    'z': [round(i, 2) for i in (df['z']).tolist()],
    'r': [round(i, 5) for i in (df['r']).tolist()],
    'parent': df['parent'].tolist(),
    'identifier': [1]+[0]*(len(df['x'])-1),
    'sample': df['sample'].tolist(),
    'type': 'swc'}
    
    flycircuit.add_Neuron(uname, name, referenceId = name, info = info if len(info) else None,
                         morphology = morphology,
                         arborization = arborization, neurotransmitters = neurotransmitters)

### Load all inferred synapses

In [None]:
synapse_list = pd.read_csv('FlyCircuit1.2_Synapse.csv', sep = ' ')

In [None]:
for i, row in tqdm(synapse_list.iterrows()):
    pre_neuron = row['pre-neuron']
    post_neuron = row['post-neuron']
    N = int(row['N'])
    if len(np.where(neuron_list['name'] == pre_neuron)[0]) and len(np.where(neuron_list['name'] == post_neuron)[0]):
        flycircuit.add_InferredSynapse(pre_neuron, post_neuron, N = N)
    