# Loading NeuroArch Database with Medulla 7 Column Dataset

This tutorial provides code to load NeuroArch database with Medulla 7 Column Dataset. Requirement before running the notebook:
- Installed [NeuroArch](https://github.com/fruitflybrain/neuroarch), [OrientDB Community Version](https://www.orientdb.org/download) version 3.1.x, and [pyorient](https://github.com/fruitflybrain/pyorient). The [NeuroNLP Docker image](https://hub.docker.com/r/fruitflybrain/neuronlp) and [FlyBrainLab Docker image](https://hub.docker.com/r/fruitflybrain/fbl) all have a copy of the software requirement ready.
- Download the [Neuprint database dump for the Medulla 7 Column dataset](https://github.com/connectome-neuprint/neuPrint/raw/master/fib25_neo4j_inputs.zip).
- Download the neuron skeletons from [ConnectomeHackathon2015 repository](https://github.com/janelia-flyem/ConnectomeHackathon2015) and rename the `skeletons` folder to `swc` and move it under the same directory as this notebook.
- Download the [GSE116969 transcriptome cell expression data](https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE116969&format=file&file=GSE116969%5FdataTable7b%2Egenes%5Fx%5Fcells%5Fp%5Fexpression%2Emodeled%5Fgenes%2Etxt%2Egz) and uncompress it to the same folder.
- Have 3GB free disk space (for Neuprint dump and NeuroArch database).

A backup of the database created by this notebook can be downloaded [here](https://drive.google.com/file/d/1lX_nGmqfQ7YouO4TTGw7Q3fRaAZn1T1S/view?usp=sharing). To restore it in OrientDB, run
```
/path/to/orientdb/bin/console.sh "create database plocal:../databases/medulla admin admin; restore database /path/to/medulla7column_fib25_na_v2.0.1_backup.zip"
```

In [None]:
import glob
import os
import subprocess
import csv
from collections import Counter

import numpy as np
import pandas as pd
from tqdm import tqdm
import h5py

import neuroarch.models as models
import neuroarch.na as na

In [None]:
from requests import HTTPError
from neuprint import Client
c = Client('neuprint-examples.janelia.org', dataset='medulla7column', token = '')

In [None]:
df = pd.read_csv('fib25/Neuprint_Neurons_fib25.csv')

In [None]:
for i in tqdm(df['bodyId:long']):
    try:
        s = c.fetch_skeleton(i, format='swc')
    except HTTPError:
        continue
    with open('swc_new/{}.swc'.format(i), 'w') as f:
        f.write(s)

## Extract Neuron and Synapse Attributes

In [None]:
def process(chunk):
    neurons = []
    for i, row in chunk.iterrows():
        li = [row['bodyId:long'], row['pre:int'], row['post:int'], row['status:string'],\
              row['statusLabel:string'], int(row['cropped:boolean']) if not np.isnan(row['cropped:boolean']) else row['cropped:boolean'], row['instance:string'], \
              row['type:string']]
        neurons.append(li)
    return neurons

In [None]:
chunksize = 100000

with open('neurons.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(['bodyID','pre','post','status','statusLabel','cropped','instance','type'])
    df = pd.read_csv('fib25/Neuprint_Neurons_fib25.csv')
    neurons = process(df)
    writer.writerows(neurons)

In [None]:
neurons = pd.read_csv('neurons.csv')
traced_neuron_id = neurons['bodyID'].to_numpy()

chunksize = 1000000
pre_syn = np.empty((int(1e7),3), np.int64)
post_syn = np.empty((int(1e7),3), np.int64)

pre_count = 0
post_count = 0
count = 0
for chunk in pd.read_csv('fib25/Neuprint_SynapseSet_to_Synapses_fib25.csv', chunksize=chunksize):
    ids = chunk[':START_ID']
    pre_site = np.array([[n, int(i.split('_')[0]), int(i.split('_')[1])] \
                         for n,i in enumerate(ids) if i.split('_')[2] == 'pre'])
    post_site = np.array([[n, int(i.split('_')[0]), int(i.split('_')[1])] \
                          for n,i in enumerate(ids) if i.split('_')[2] == 'post'])
    pre_site_known = pre_site[np.logical_and(
                              np.isin(pre_site[:,1], traced_neuron_id),
                              np.isin(pre_site[:,2], traced_neuron_id)),0]
    post_site_known = post_site[np.logical_and(
                                np.isin(post_site[:,1], traced_neuron_id),
                                np.isin(post_site[:,2], traced_neuron_id)),0]
    retrieved_pre_site = chunk.iloc[pre_site_known]
    pre_site = np.array([[row[':END_ID(Syn-ID)'], int(row[':START_ID'].split('_')[0]), int(row[':START_ID'].split('_')[1])] \
                         for i, row in retrieved_pre_site.iterrows()])
    retrieved_post_site = chunk.iloc[post_site_known]
    post_site = np.array([[row[':END_ID(Syn-ID)'], int(row[':START_ID'].split('_')[0]), int(row[':START_ID'].split('_')[1])] \
                         for i, row in retrieved_post_site.iterrows()])
    if pre_site.size:
        pre_syn[pre_count:pre_count+pre_site.shape[0], :] = pre_site
        pre_count += pre_site.shape[0]
    if post_site.size:
        post_syn[post_count:post_count+post_site.shape[0], :] = post_site
        post_count += post_site.shape[0]
    count += chunksize
    print(count, pre_count, post_count)

pre_syn = pre_syn[:pre_count,:]
post_syn = post_syn[:post_count,:]

ind = np.argsort(pre_syn[:,0])
pre_syn_sorted = pre_syn[ind, :]
ind = np.argsort(post_syn[:,0])
post_syn_sorted = post_syn[ind, :]


In [None]:
# extract synapse (pre-site) to synapse (post-site) connection
# use only the post synaptic site to get all the synapses because one presynaptic site can have multiple postsynaptic sites
post_syn_index = post_syn_sorted[:,0].copy()

df = pd.read_csv('fib25/Neuprint_Synapse_Connections_fib25.csv')
post_ids = df[':END_ID(Syn-ID)']
used = np.where(post_ids.isin(post_syn_index).to_numpy())[0]
connections = df.iloc[used].to_numpy()
ind = np.argsort(connections[:,1])
connections = connections[ind, :]


In [None]:
# extract synapse details
# with h5py.File('syn_pre_post_sorted_by_synapse_id.h5', 'r') as f:
#     pre_syn_sorted = f['pre_syn_sorted'][:]
#     post_syn_sorted = f['post_syn_sorted'][:]
chunksize = 100000

pre_syn_index = list(set(pre_syn_sorted[:,0].copy()))
pre_syn_index.extend(list(post_syn_sorted[:,0].copy()))
syn_index = np.array(sorted(pre_syn_index))
del pre_syn_index#, pre_syn_sorted, post_syn_sorted

synapse_array = np.empty((len(syn_index), 6), np.int64)

synapse_count = 0
count = 0

for chunk in pd.read_csv('fib25/Neuprint_Synapses_fib25.csv', chunksize=chunksize):
    ids = chunk[':ID(Syn-ID)']
    
    start_id = ids.iloc[0]
    stop_id = ids.iloc[-1]
    pre_start = np.searchsorted(syn_index, start_id, side='left')
    pre_end = np.searchsorted(syn_index, stop_id, side='right')
    if pre_start >= len(syn_index):
        pre_index = []
    else:
        if pre_end >= len(syn_index):
            pre_index = syn_index[pre_start:pre_end] #same as syn_index[pre_start:]
        else:
            pre_index = syn_index[pre_start:pre_end]
    pre_used_synapse = chunk.loc[ids.isin(pre_index)]
    li = np.empty((pre_index.size, 6), np.int64)
    i = 0
    for _, row in pre_used_synapse.iterrows():
        location = eval(row['location:point{srid:9157}'].replace('x', "'x'").replace('y', "'y'").replace('z', "'z'"))
        li[i,:] = [row[':ID(Syn-ID)'], # synpase id
                     0 if row['type:string'] == 'pre' else 1, #synapse type
                     int(row['confidence:float']*1000000), #confidence
                     location['x'], location['y'], location['z']]
        i += 1
    synapse_array[synapse_count:synapse_count+pre_index.shape[0],:] = li
    synapse_count += pre_index.shape[0]
    count += chunksize
    print(count, len(pre_used_synapse))
synapse_array = synapse_array[:synapse_count,:]


In [None]:
# reorder synapses

synapse_connections = connections
    
ids = synapse_array[:,0]
syn_id_dict = {j: i for i, j in enumerate(ids)}
# ids = pre_syn_sorted[:,0]
# pre_syn_id_dict = {j: i for i, j in enumerate(ids)} # map syn id to pre_syn_sorted
ids = post_syn_sorted[:,0]
post_syn_id_dict = {j: i for i, j in enumerate(ids)} # map syn id to post_syn_sorted

synapse_dict = {}
wrong_synapse = 0
for i, pair in tqdm(enumerate(synapse_connections)):
    pre_syn_id = pair[0]
    post_syn_id = pair[1]
    post_id = post_syn_id_dict[post_syn_id]
    post_info = synapse_array[syn_id_dict[post_syn_id]]
    post_neuron_id, pre_neuron_id = post_syn_sorted[post_id, 1:]

    #if len(np.where((pre_syn_sorted == (pre_syn_id, pre_neuron_id, post_neuron_id)).all(axis=1))[0]) != 1:
    #    print(pre_syn_id, post_syn_id)
    # pre_id = pre_syn_id_dict[pre_syn_id]
    pre_info = synapse_array[syn_id_dict[pre_syn_id]]

    if pre_neuron_id not in synapse_dict:
        synapse_dict[pre_neuron_id] = {}
    pre_dict = synapse_dict[pre_neuron_id]
    if post_neuron_id not in synapse_dict[pre_neuron_id]:
        pre_dict[post_neuron_id] =  {'pre_synapse_ids': [],
                                     'post_synapse_ids': [],
                                     'pre_confidence': [],
                                     'post_confidence': [],
                                     'pre_x': [],
                                     'pre_y': [],
                                     'pre_z': [],
                                     'post_x': [],
                                     'post_y': [],
                                     'post_z': [],
                                     }
    info_dict = pre_dict[post_neuron_id]
    info_dict['pre_synapse_ids'].append(pre_syn_id)
    info_dict['post_synapse_ids'].append(post_syn_id)
    info_dict['pre_confidence'].append(pre_info[2])
    info_dict['post_confidence'].append(post_info[2])
    info_dict['pre_x'].append(pre_info[3])
    info_dict['pre_y'].append(pre_info[4])
    info_dict['pre_z'].append(pre_info[5])
    info_dict['post_x'].append(post_info[3])
    info_dict['post_y'].append(post_info[4])
    info_dict['post_z'].append(post_info[5])

with open('synapses.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(['pre_id','post_id','N','pre_confidence','post_confidence',\
                     'pre_x','pre_y','pre_z','post_x','post_y','post_z'])
    for pre, k in tqdm(synapse_dict.items()):
        for post, v in k.items():
            writer.writerow([pre, post, len(v['pre_x']), str(v['pre_confidence']), \
                             str(v['post_confidence']), str(v['pre_x']), str(v['pre_y']), str(v['pre_z']), \
                             str(v['post_x']), str(v['post_y']), str(v['post_z'])])

## Load Data to NeuroArch

In [None]:
medulla = na.NeuroArch('medulla', mode = 'o', version = "2.0.1",
                       maintainer_name = "", maintainer_email = "")

In [None]:
species = medulla.add_Species('Drosophila melanogaster', stage = 'adult',
                                sex = 'female',
                                synonyms = ['fruit fly', 'common fruit fly', 'vinegar fly'])

In [None]:
version = 'fib25'
datasource = medulla.add_DataSource('Medulla7column', version = version,
                                      url = 'https://www.janelia.org/project-team/flyem/research/previous-connectomes-analyzed/seven-column-connectome-fib-sem',
                                      description = 'data obtained from https://github.com/connectome-neuprint/neuPrint/blob/922a107df827a2fedd671438595603c4d15eafa7/fib25_neo4j_inputs.zip; neuron skeleton from https://github.com/janelia-flyem/ConnectomeHackathon2015',
                                      species = species)
medulla.default_DataSource = datasource
transcriptome_datasource = medulla.add_DataSource('GSE116969', version = '1.0',
                                                  url = 'https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE116969',
                                                  description = 'Fred P Davis, Aljoscha Nern, Serge Picard, Michael B Reiser, Gerald M Rubin, Sean R Eddy, Gilbert L Henry, A genetic, genomic, and computational resource for exploring neural circuit function. eLife 2020;9:e50901. DOI: 10.7554/eLife.50901',
                                                  species = species)

In [None]:
medulla.add_Neuropil('MED(L)', synonyms = ['left medulla'])

for i in range(1, 11):
    medulla.add_Subregion('MED-M{}(L)'.format(i),\
                          synonyms = ['left medulla M{} stratum'.format(i), 'left medulla stratum M{}'.format(i)],
                          neuropil = 'MED(L)')

In [None]:
nt_df = pd.read_csv('GSE116969_dataTable7b.genes_x_cells_p_expression.modeled_genes.txt', sep = '\t', index_col = 0)

In [None]:
neuron_list = pd.read_csv('neurons.csv')
new_swc_dir = 'swc_new'
swc_dir = 'swc'
uname_dict = {}
columns = {}
new_morph = []
old_morph = []
to_combine = []

added_neurons = []
added_fragments = []


for i, row in tqdm(neuron_list.iterrows()):
    bodyID = row['bodyID']
    cell_type = row['type']
    name = row['instance']
    segment = False
    
    if not isinstance(name, str):
        if isinstance(cell_type, str):
            name = '{}-{}'.format(cell_type, bodyID)
        else:
            segment = True
    else:
        if ' home' in name:
            name = name.replace(' home', '-home')
        if not isinstance(cell_type, str):
            if name.split('-')[0] == 'tan':
                cell_type = 'tangential'
                if name not in uname_dict:
                    uname_dict[name] = 0
                uname_dict[name] += 1
                name = '{}-{}'.format(name, uname_dict[name])
            elif name.split('-')[0].startswith('out'):
                cell_type = 'output'
            elif name in ['Tm23/24', 'Tm23/24-F', 'Dm8-out', 'TmY16?']:
                cell_type = name
                if name not in uname_dict:
                    uname_dict[name] = 0
                uname_dict[name] += 1
                name = '{}-{}'.format(name, uname_dict[name])
            elif name.split('-')[0].startswith('glia'):
                cell_type = 'glia'
                if 'glia' not in uname_dict:
                    uname_dict['glia'] = 0
                uname_dict['glia'] += 1
                name = '{}-{}'.format('glia', uname_dict['glia'])
            elif name.startswith('sdk'):
                cell_type = 'sdk'
                if 'sdk' not in uname_dict:
                    uname_dict['sdk'] = 0
                uname_dict['sdk'] += 1
                name = '{}-{}'.format('sdk', uname_dict['sdk'])
            else:
                cell_type = name.split('-')[0]
                if '-' not in name or 'like' in name:
                    if name not in uname_dict:
                        uname_dict[name] = 0
                    uname_dict[name] += 1
                    name = '{}-{}'.format(name, uname_dict[name])
        else:
            if name in ['Tm23/24', 'Tm23/24-F', 'Dm8-out', 'TmY16?']:
                if name not in uname_dict:
                    uname_dict[name] = 0
                uname_dict[name] += 1
                name = '{}-{}'.format(name, uname_dict[name])
            elif cell_type == 'cell':
                if name not in uname_dict:
                    uname_dict[name] = 0
                uname_dict[name] += 1
                name = '{}-{}'.format(name, uname_dict[name])
            elif '-' not in name or 'like' in name:
                if name not in uname_dict:
                    uname_dict[name] = 0
                uname_dict[name] += 1
                name = '{}-{}'.format(name, uname_dict[name])
    if not segment:
        info = {}
        column = name.split('-')[-1]
        if column in ['ant', 'post']:
            column = name.split('-')[-2]
        circuit = None
        if (len(column) == 1 and column.isalpha()) or column == 'home':
            circuit = columns.get(column, None)
            if circuit is None:
                circuit = medulla.add_Circuit('Column {}'.format(column), 'Column', neuropil = 'MED(L)')
                columns[column] = circuit
    
        neurotransmitter = []
        if cell_type in nt_df.columns or cell_type == 'R8':
            if cell_type in ['R{}'.format(i) for i in range(1,7)]:
                gene_expression_type = 'R1_6'
            elif cell_type == 'R8':
                gene_expression_type = 'R8_Rh5'
            else:
                gene_expression_type = cell_type

            if nt_df[gene_expression_type]['Hdc'] > 0.9:
                neurotransmitter.append('histamine')
            if nt_df[gene_expression_type]['Gad1'] > 0.9:
                neurotransmitter.append('GABA')
            if nt_df[gene_expression_type]['VAChT'] > 0.9:
                neurotransmitter.append('acetylcholine')
            if nt_df[gene_expression_type]['VGlut'] > 0.9:
                neurotransmitter.append('glutamate')
            if nt_df[gene_expression_type]['ple'] > 0.9 \
                    and nt_df[gene_expression_type]['Ddc'] > 0.9 \
                    and nt_df[gene_expression_type]['Vmat'] > 0.9 \
                    and nt_df[gene_expression_type]['DAT'] > 0.9:
                neurotransmitter.append('dopamine')

    if os.path.exists('{}/{}.swc'.format(new_swc_dir, bodyID)):
        morphology = {'type': 'swc', 'filename': '{}/{}.swc'.format(new_swc_dir, bodyID), 'scale': 0.001*10}
        new_morph.append(bodyID)
    elif os.path.exists('{}/{}.swc'.format(swc_dir, bodyID)):
        morphology = {'type': 'swc', 'filename': '{}/{}.swc'.format(swc_dir, bodyID), 'scale': 0.001*10}
        old_morph.append(bodyID)
    else:
        morphology = None
        if segment: # no name, not traced, no morph
            to_combine.append(bodyID)
            continue
        else:
            segment = True
            if name.lower().startswith('out-'):
                to_combine.append(bodyID)
                continue
    
    if isinstance(row['statusLabel'], str):
        info['Tracing Status'] = row['statusLabel']
    else:
        info['Tracing Status'] = 'Untraced'
    
    if not segment:
        medulla.add_Neuron(name, # uname
                           cell_type, # name
                           referenceId = str(bodyID), #referenceId
                           info = info if len(info) else None,
                           morphology = morphology,
                           neurotransmitters = neurotransmitter if len(neurotransmitter) else None,
                           neurotransmitters_datasources = [transcriptome_datasource]*len(neurotransmitter) if len(neurotransmitter) else None,
                           circuit = circuit)
        added_neurons.append(bodyID)
    else:
        if cell_type == 'unknown' or not isinstance(cell_type, str):
            cell_type = 'segment'
            name = 'segment_{}'.format(bodyID)
        else:
            name = 'segment_{}'.format(name)
            if str(bodyID) not in name:
                name = 'segment_{}'.format(bodyID)
        medulla.add_NeuronFragment(name,
                                   cell_type,
                                   referenceId = str(bodyID),
                                   info = info if len(info) else None,
                                   morphology = morphology)
        added_fragments.append(bodyID)

In [None]:
neurons = medulla.sql_query('select from NeuronAndFragment').node_objs
# set the cache so there is no need for database access.
for neuron in neurons:
    medulla.set('NeuronAndFragment', neuron.uname, neuron, medulla.default_DataSource)
neuron_ref_to_obj = {int(neuron.referenceId): neuron for neuron in neurons}

In [None]:
synapse_df = pd.read_csv('synapses.csv')

In [None]:
tmp = {}
morph_dict = {}

for i, row in tqdm(synapse_df.iterrows()):
    pre = int(row['pre_id'])
    post = int(row['post_id'])
    if pre not in neuron_ref_to_obj:
        pre = -1
    if post not in neuron_ref_to_obj:
        post = -1
    if pre == -1 and post == -1:
        continue

    pre_conf = np.array(eval(row['pre_confidence']))/1e6
    post_conf = np.array(eval(row['post_confidence']))/1e6
    NHP = np.sum(np.logical_and(post_conf>=0.7, pre_conf>=0.7))
    
    if pre == -1:
        if post not in tmp:
            tmp[post] = {}
        if 'pre' not in tmp[post]:
            tmp[post]['pre'] = {'pre_x': [], 'pre_y': [], 'pre_z': [], 'post_x': [], 'post_y': [], 'post_z': [],
                                'pre_confidence': [], 'post_confidence': [],
                                'N': 0, 'NHP': 0}
        tmp[post]['pre']['pre_x'].append(np.array(eval(row['pre_x']))*0.01)
        tmp[post]['pre']['pre_y'].append(np.array(eval(row['pre_y']))*0.01)
        tmp[post]['pre']['pre_z'].append(np.array(eval(row['pre_z']))*0.01)
        tmp[post]['pre']['post_x'].append(np.array(eval(row['post_x']))*0.01)
        tmp[post]['pre']['post_y'].append(np.array(eval(row['post_y']))*0.01)
        tmp[post]['pre']['post_z'].append(np.array(eval(row['post_z']))*0.01)
        tmp[post]['pre']['pre_confidence'].append(pre_conf)
        tmp[post]['pre']['post_confidence'].append(post_conf)
        tmp[post]['pre']['N'] += row['N']
        tmp[post]['pre']['NHP'] += NHP


    elif post == -1:
        if pre not in tmp:
            tmp[pre] = {}
        if 'post' not in tmp[pre]:
            tmp[pre]['post'] = {'pre_x': [], 'pre_y': [], 'pre_z': [], 'post_x': [], 'post_y': [], 'post_z': [],
                                'pre_confidence': [], 'post_confidence': [],
                                'N': 0, 'NHP': 0}
        tmp[pre]['post']['pre_x'].append(np.array(eval(row['pre_x']))*0.01)
        tmp[pre]['post']['pre_y'].append(np.array(eval(row['pre_y']))*0.01)
        tmp[pre]['post']['pre_z'].append(np.array(eval(row['pre_z']))*0.01)
        tmp[pre]['post']['post_x'].append(np.array(eval(row['post_x']))*0.01)
        tmp[pre]['post']['post_y'].append(np.array(eval(row['post_y']))*0.01)
        tmp[pre]['post']['post_z'].append(np.array(eval(row['post_z']))*0.01)
        tmp[pre]['post']['pre_confidence'].append(pre_conf)
        tmp[pre]['post']['post_confidence'].append(post_conf)
        tmp[pre]['post']['N'] += row['N']
        tmp[pre]['post']['NHP'] += NHP

    else:
        pre_neuron = neuron_ref_to_obj[pre]
        post_neuron = neuron_ref_to_obj[post]
        content = {'type': 'swc'}
        content['x'] = (np.array(eval(row['pre_x'])+eval(row['post_x']))/1000.*10).tolist()
        content['y'] = (np.array(eval(row['pre_y'])+eval(row['post_y']))/1000.*10).tolist()
        content['z'] = (np.array(eval(row['pre_z'])+eval(row['post_z']))/1000.*10).tolist()
        content['r'] = [0]*len(content['x'])
        content['parent'] = [-1]*(len(content['x'])//2) + [i+1 for i in range(len(content['x'])//2)]
        content['identifier'] = [7]*(len(content['x'])//2) + [8]*(len(content['x'])//2)
        content['sample'] = [i+1 for i in range(len(content['x']))]
        content['confidence'] = pre_conf.tolist() + post_conf.tolist()

        synapse = medulla.add_Synapse(pre_neuron, post_neuron, N = row['N'], NHP = NHP)
        morph_dict[synapse._id] = {'morphology': content}
        


In [None]:
medulla.flush_edges()

In [None]:
version = '2.0'
species = medulla.sql_query('select from Species').node_objs[0]
notional_datasource = medulla.add_DataSource('notional', version = version,
                                         species = species)


In [None]:
for n, v in tqdm(tmp.items()):
    if 'pre' in v:
        post_neuron = neuron_ref_to_obj[n]
        cell_type = 'combined_untraced_segments'
        name = 'segments_presynaptic_to_{}'.format(post_neuron.uname)
        pre_neuron = medulla.add_NeuronFragment(
                                     name,
                                     cell_type,
                                     data_source = notional_datasource)
        
        content = {'type': 'swc'}
        content['x'] = np.concatenate(v['pre']['pre_x'] + v['pre']['post_x']).tolist()
        content['y'] = np.concatenate(v['pre']['pre_y'] + v['pre']['post_y']).tolist()
        content['z'] = np.concatenate(v['pre']['pre_z'] + v['pre']['post_z']).tolist()
        content['r'] = [0]*len(content['x'])
        content['parent'] = [-1]*(len(content['x'])//2) + [i+1 for i in range(len(content['x'])//2)]
        content['identifier'] = [7]*(len(content['x'])//2) + [8]*(len(content['x'])//2)
        content['sample'] = [i+1 for i in range(len(content['x']))]
        content['confidence'] = np.concatenate(v['pre']['pre_confidence'] + v['pre']['post_confidence']).tolist()
        synapse = medulla.add_Synapse(pre_neuron, post_neuron, N = v['pre']['N'], NHP = v['pre']['NHP'])
        morph_dict[synapse._id] = {'morphology': content}
    if 'post' in v:
        pre_neuron = neuron_ref_to_obj[n]
        cell_type = 'combined_untraced_segments'
        name = 'segments_postsynaptic_to_{}'.format(pre_neuron.uname)
        post_neuron = medulla.add_NeuronFragment(
                                     name,
                                     cell_type,
                                     data_source = notional_datasource)
        content = {'type': 'swc'}
        content['x'] = np.concatenate(v['post']['pre_x'] + v['post']['post_x']).tolist()
        content['y'] = np.concatenate(v['post']['pre_y'] + v['post']['post_y']).tolist()
        content['z'] = np.concatenate(v['post']['pre_z'] + v['post']['post_z']).tolist()
        content['r'] = [0]*len(content['x'])
        content['parent'] = [-1]*(len(content['x'])//2) + [i+1 for i in range(len(content['x'])//2)]
        content['identifier'] = [7]*(len(content['x'])//2) + [8]*(len(content['x'])//2)
        content['sample'] = [i+1 for i in range(len(content['x']))]
        content['confidence'] = np.concatenate(v['post']['pre_confidence'] + v['post']['post_confidence']).tolist()
        synapse = medulla.add_Synapse(pre_neuron, post_neuron, N = v['post']['N'], NHP = v['post']['NHP'])
        morph_dict[synapse._id] = {'morphology': content}

In [None]:
medulla.flush_edges()

In [None]:
for rid, data in tqdm(morph_dict.items()):
    if 'morphology' in data:
        medulla.add_morphology(rid, data['morphology'])

## Figure out Arborization Data from Loaded Synapse Positions

In [None]:
def strata_arborization(z):
    sep = np.array([20, 28, 35, 38, 42, 44, 55, 58, 70])
    return Counter(np.digitize(z, sep))

In [None]:
neurons = medulla.sql_query('select from NeuronAndFragment').node_objs

In [None]:
neuron_dict = {}

for neuron in neurons:
    neuron_dict[neuron.uname] = {'axons': Counter(), 'dendrites': Counter(), 'obj': neuron} 
    
for neuron in tqdm(neurons):
    outgoing_synapses = medulla.sql_query("""select expand(out('SendsTo')) from {}""".format(neuron._id)).node_objs
    for synapse in outgoing_synapses:
        morphology = [n for n in synapse.out('HasData') if isinstance(n, models.MorphologyData)][0]
        arborization = []
        arborization.append({'type': 'neuropil',
                             'synapses': {'MED(L)': len(morphology.x)}})
        s = strata_arborization(morphology.z[:(len(morphology.z)//2)])
        arborization.append({'type': 'subregion',
                             'synapses': {'MED-M{}(L)'.format(k+1): v for k, v in s.items()}})
        neuron_dict[neuron.uname]['axons'] += s
        try:
            neuron_dict[synapse.out('SendsTo')[0].uname]['dendrites'] += s
        except KeyError:
            pass
        medulla.add_synapse_arborization(synapse, arborization)


In [None]:
for i, neuron in tqdm(enumerate(neurons)):
    if not isinstance(neuron, models.Neuron):
        continue
    arborization = []
    arborization.append({'type': 'neuropil',
                         'dendrites': {'MED(L)': sum(neuron_dict[neuron.uname]['dendrites'].values())},
                         'axons': {'MED(L)': sum(neuron_dict[neuron.uname]['axons'].values())}})
    arborization.append({'type': 'subregion',
                         'dendrites': {'MED-M{}(L)'.format(k+1): v for k, v in neuron_dict[neuron.uname]['dendrites'].items()},
                         'axons': {'MED-M{}(L)'.format(k+1): v for k, v in neuron_dict[neuron.uname]['axons'].items()}})
    medulla.add_neuron_arborization(neuron, arborization)
    
    