This tutorial provides code to load NeuroArch database with MANC Dataset v1.0. Requirement before running the notebook:
- Installed [NeuroArch](https://github.com/fruitflybrain/neuroarch), [OrientDB Community Version](https://www.orientdb.org/download) version 3.1.x, and [pyorient](https://github.com/fruitflybrain/pyorient). The [NeuroNLP Docker image](https://hub.docker.com/r/fruitflybrain/neuronlp) and [FlyBrainLab Docker image](https://hub.docker.com/r/fruitflybrain/fbl) all have a copy of the software requirement ready.
- Installed [PyMeshLab](https://pypi.org/project/pymeshlab/).
- Installed [neuprint-python](https://github.com/connectome-neuprint/neuprint-python).
- Download the [Neuprint database dump for the MANC dataset v1.0](https://storage.googleapis.com/flyem-manc-exports/v1.0/neuprint_manc_v1.0/neuprint_manc_v1.0_csv.tar.gz).
- Have the [token](https://connectome-neuprint.github.io/neuprint-python/docs/client.html#neuprint.client.Client) for Neuprint HTTP access ready.
- Have more than 30 GB free disk space (for Neuprint dump and NeuroArch database).

A backup of the database created by this notebook can be downloaded [here](https://drive.google.com/file/d/15MgSmFMFl_vUtS32rVpb0E7HKpJAQe8v/view?usp=drive_link). To restore it in OrientDB, run
```
/path/to/orientdb/bin/console.sh "create database plocal:../databases/manc admin admin; restore database /path/to/manc1.0_na_v1.0.0_backup.zip"
```

In [None]:
import glob
import os
import subprocess
import csv
import json
import warnings
from requests import HTTPError

import numpy as np
import pandas as pd
# from neuprint import Client
from tqdm import tqdm
import h5py
import pymeshlab as ml

In [None]:
# define region of interests (ROIs)
all_rois = {}
with open("neuprint_manc_v1.0_csv/all_ROIs.txt") as f:
    for line in f:
        entry = {'System': 'VNC', 'Neuropil':line.strip()}
        all_rois[line.strip()]= entry

In [None]:
def process(chunk):
    #status = np.nonzero(np.array([i == 'Traced' for i in chunk['status:string'].values]))[0]
    status = np.nonzero((chunk['upstream:int']+chunk['downstream:int']!=0).to_numpy())[0]
    used = chunk.iloc[status]
    #used = chunk
    neurons = []

    for i, row in used.iterrows():
        neuropil_list = []
        kk = json.loads(row['roiInfo:string'])
        for k, v in kk.items():
            if k == "None": continue
            region = all_rois[k]
            neuropil_list.append('{}:{}:{}'.format(
                region['Neuropil'], v.get('upstream', 0), v.get('downstream', 0)))

        neuropil_list = ';'.join(neuropil_list)
        "instance:string", "type:string", "systematicType:string", "hemilineage:string", "somaSide:string", "rootSide:string", "class:string", "subclass:string",  "entryNerve:string", "exitNerve:string", "rootSide:string", "somaNeuromere:string", "somaLocation:point{srid:9157}", "predictedNt:string", "predictedNtProb:float", "modality:string", "transmission:string"
        li = [row['bodyId:long'],
              row['upstream:int'],
              row['downstream:int'],
              row['status:string'],
              row['statusLabel:string'],
              int(row['cropped:boolean']) if not np.isnan(row['cropped:boolean']) else row['cropped:boolean'],
              row['instance:string'], 
              row['type:string'],
              row["systematicType:string"],
              row["hemilineage:string"],
              row["somaSide:string"],
              row["rootSide:string"],
              row["class:string"],
              row["subclass:string"],
              row["entryNerve:string"],
              row["exitNerve:string"],
              row["rootSide:string"],
              row["somaNeuromere:string"],
              row["somaLocation:point{srid:9157}"],
              row["predictedNt:string"],
              row["predictedNtProb:float"],
              row["modality:string"],
              row["transmission:string"],
              row['size:long'],
              neuropil_list]
        neurons.append(li)
    return neurons

In [None]:
chunksize = 100000

with open('neurons_all.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(['bodyID','pre','post','status','statusLabel','cropped',
                     'instance','type', 'systematicType', 'hemilineage', 'somaSide', 'rootSide',
                     'class','subclass', 'entryNerve', 'exitNerve', 'rootSide', 'somaNeuromere',
                     'somaLocation', 'predictedNt', 'predictedNtProb', 'modality', 'transmission',
                     'size','neuropils'])
    for chunk in tqdm(pd.read_csv('neuprint_manc_v1.0_csv/Neuprint_Neurons_manc_v1.csv', chunksize=chunksize)):
        neurons = process(chunk)
        writer.writerows(neurons)

In [None]:
neurons = pd.read_csv('neurons_all.csv')
        
traced_neuron_id = neurons['bodyID'].to_numpy()
        
chunksize = 1000000
pre_syn = np.empty((int(2e8),3), np.int64)
post_syn = np.empty((int(2e8),3), np.int64)

pre_count = 0
post_count = 0
count = 0
for chunk in pd.read_csv('neuprint_manc_v1.0_csv//Neuprint_SynapseSet_to_Synapses_manc_v1.csv', chunksize=chunksize):
    ids = chunk[':START_ID(SynSet-ID)']
    pre_site = np.array([[n, int(i.split('_')[0]), int(i.split('_')[1])] \
                         for n,i in enumerate(ids) if i.split('_')[2] == 'pre'])
    post_site = np.array([[n, int(i.split('_')[0]), int(i.split('_')[1])] \
                          for n,i in enumerate(ids) if i.split('_')[2] == 'post'])
    pre_site_known = pre_site[:,0]
    post_site_known = post_site[:,0]
    retrieved_pre_site = chunk.iloc[pre_site_known]
    pre_site = np.array([[row[':END_ID(Syn-ID)'], int(row[':START_ID(SynSet-ID)'].split('_')[0]), int(row[':START_ID(SynSet-ID)'].split('_')[1])] \
                         for i, row in retrieved_pre_site.iterrows()])
    retrieved_post_site = chunk.iloc[post_site_known]
    post_site = np.array([[row[':END_ID(Syn-ID)'], int(row[':START_ID(SynSet-ID)'].split('_')[0]), int(row[':START_ID(SynSet-ID)'].split('_')[1])] \
                         for i, row in retrieved_post_site.iterrows()])
    if pre_site.size:
        pre_syn[pre_count:pre_count+pre_site.shape[0], :] = pre_site
        pre_count += pre_site.shape[0]
    if post_site.size:
        post_syn[post_count:post_count+post_site.shape[0], :] = post_site
        post_count += post_site.shape[0]
    count += chunksize
    print(count, pre_count, post_count)

pre_syn = pre_syn[:pre_count,:]
post_syn = post_syn[:post_count,:]

ind = np.argsort(pre_syn[:,0])
pre_syn_sorted = pre_syn[ind, :]
ind = np.argsort(post_syn[:,0])
post_syn_sorted = post_syn[ind, :]
# with h5py.File('syn_pre_post_sorted_by_synapse_id.h5', 'w') as f:
#     f['pre_syn_sorted'] = pre_syn_sorted
#     f['post_syn_sorted'] = post_syn_sorted

In [None]:
# extract synapse (pre-site) to synapse (post-site) connection
# use only the post synaptic site to get all the synapses because one presynaptic site can have multiple postsynaptic sites
# with h5py.File('syn_pre_post_sorted_by_synapse_id.h5', 'r') as f:
#     post_syn_sorted = f['post_syn_sorted'][:]
post_syn_index = post_syn_sorted[:,0].copy()

df = pd.read_csv('neuprint_manc_v1.0_csv/Neuprint_Synapse_Connections_manc_v1.csv')
post_ids = df[':END_ID(Syn-ID)']
used = np.where(post_ids.isin(post_syn_index).to_numpy())[0]
connections = df.iloc[used].to_numpy()
ind = np.argsort(connections[:,1])
connections = connections[ind, :]
# with h5py.File('synapse_connections.h5', 'w') as f:
#     f['synapse_connecions'] = connections

In [None]:
# extract synapse details
# with h5py.File('syn_pre_post_sorted_by_synapse_id.h5', 'r') as f:
#     pre_syn_sorted = f['pre_syn_sorted'][:]
#     post_syn_sorted = f['post_syn_sorted'][:]
chunksize = 100000

pre_syn_index = list(set(pre_syn_sorted[:,0].copy()))
pre_syn_index.extend(list(post_syn_sorted[:,0].copy()))
syn_index = np.array(sorted(pre_syn_index))
del pre_syn_index#, pre_syn_sorted, post_syn_sorted

synapse_array = np.empty((len(syn_index), 6), np.int64)
synapse_innervate = np.empty((len(syn_index), 61), bool) # number of columns that are neuropil:bool

synapse_count = 0
count = 0

for chunk in pd.read_csv('neuprint_manc_v1.0_csv/Neuprint_Synapses_manc_v1.csv', chunksize=chunksize):
    ids = chunk[':ID(Syn-ID)']
    
    start_id = ids.iloc[0]
    stop_id = ids.iloc[-1]
    pre_start = np.searchsorted(syn_index, start_id, side='left')
    pre_end = np.searchsorted(syn_index, stop_id, side='right')
    if pre_start >= len(syn_index):
        pre_index = []
    else:
        if pre_end >= len(syn_index):
            pre_index = syn_index[pre_start:pre_end] #same as syn_index[pre_start:]
        else:
            pre_index = syn_index[pre_start:pre_end]
    pre_used_synapse = chunk.loc[ids.isin(pre_index)]
    li = np.empty((pre_index.size, 6), np.int64)
    li1 = np.empty((pre_index.size, 61), bool) # number of columns that are neuropil:bool
    i = 0
    for _, row in pre_used_synapse.iterrows():
        location = eval(row['location:point{srid:9157}'].replace('x', "'x'").replace('y', "'y'").replace('z', "'z'"))
        li[i,:] = [row[':ID(Syn-ID)'], # synpase id
                     0 if row['type:string'] == 'pre' else 1, #synapse type
                     int(row['confidence:float']*1000000), #confidence
                     location['x'], location['y'], location['z']]
        li1[i,:] = ~np.isnan(np.asarray(row.values[9:], np.double))
        i += 1
    synapse_array[synapse_count:synapse_count+pre_index.shape[0],:] = li
    synapse_innervate[synapse_count:synapse_count+pre_index.shape[0],:] = li1
    synapse_count += pre_index.shape[0]
    count += chunksize
    print(count, len(pre_used_synapse))
synapse_array = synapse_array[:synapse_count,:]
synapse_innervate = synapse_innervate[:synapse_count,:]

# with h5py.File('syn_used_details.h5', 'w') as f:
#     f['synapse_array'] = synapse_array
#     f['synapse_innervate'] = synapse_innervate

In [None]:
# reorder synapses

# with h5py.File('syn_used_details.h5', 'r') as f:
#     synapse_array = f['synapse_array'][:]
#     synapse_innervate = f['synapse_innervate']

# with h5py.File('synapse_connections.h5', 'r') as f:
#     synapse_connections = f['synapse_connecions'][:]
 
# with h5py.File('syn_pre_post_sorted_by_synapse_id.h5', 'r') as f:
#     pre_syn_sorted = f['pre_syn_sorted'][:]
#     post_syn_sorted = f['post_syn_sorted'][:]

synapse_connections = connections
    
ids = synapse_array[:,0]
syn_id_dict = {j: i for i, j in enumerate(ids)}
# ids = pre_syn_sorted[:,0]
# pre_syn_id_dict = {j: i for i, j in enumerate(ids)} # map syn id to pre_syn_sorted
ids = post_syn_sorted[:,0]
post_syn_id_dict = {j: i for i, j in enumerate(ids)} # map syn id to post_syn_sorted

synapse_dict = {}
wrong_synapse = 0
for i, pair in tqdm(enumerate(synapse_connections)):
    pre_syn_id = pair[0]
    post_syn_id = pair[1]
    post_id = post_syn_id_dict[post_syn_id]
    post_info = synapse_array[syn_id_dict[post_syn_id]]
    post_info1 = synapse_innervate[syn_id_dict[post_syn_id]]
    post_neuron_id, pre_neuron_id = post_syn_sorted[post_id, 1:]

    #if len(np.where((pre_syn_sorted == (pre_syn_id, pre_neuron_id, post_neuron_id)).all(axis=1))[0]) != 1:
    #    print(pre_syn_id, post_syn_id)
    # pre_id = pre_syn_id_dict[pre_syn_id]
    pre_info = synapse_array[syn_id_dict[pre_syn_id]]
    pre_info1 = synapse_innervate[syn_id_dict[pre_syn_id]]

    if pre_neuron_id not in synapse_dict:
        synapse_dict[pre_neuron_id] = {}
    pre_dict = synapse_dict[pre_neuron_id]
    if post_neuron_id not in synapse_dict[pre_neuron_id]:
        pre_dict[post_neuron_id] =  {'pre_synapse_ids': [],
                                     'post_synapse_ids': [],
                                     'pre_confidence': [],
                                     'post_confidence': [],
                                     'pre_x': [],
                                     'pre_y': [],
                                     'pre_z': [],
                                     'post_x': [],
                                     'post_y': [],
                                     'post_z': [],
                                     'regions': np.zeros(61, np.int32)}
    info_dict = pre_dict[post_neuron_id]
    info_dict['pre_synapse_ids'].append(pre_syn_id)
    info_dict['post_synapse_ids'].append(post_syn_id)
    info_dict['pre_confidence'].append(pre_info[2])
    info_dict['post_confidence'].append(post_info[2])
    info_dict['pre_x'].append(pre_info[3])
    info_dict['pre_y'].append(pre_info[4])
    info_dict['pre_z'].append(pre_info[5])
    info_dict['post_x'].append(post_info[3])
    info_dict['post_y'].append(post_info[4])
    info_dict['post_z'].append(post_info[5])
    info_dict['regions'] += post_info1


In [None]:
chunk = pd.read_csv('neuprint_manc_v1.0_csv/Neuprint_Synapses_manc_v1.csv', chunksize=1).get_chunk()
labels = [i.split(':')[0] for i in chunk.columns.to_list()]
regions = labels[9:]

with open('synapses_all.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(['pre_id','post_id','N','pre_confidence','post_confidence',\
                     'pre_x','pre_y','pre_z','post_x','post_y','post_z',\
                     'neuropils'])
    for pre, k in tqdm(synapse_dict.items()):
        for post, v in k.items():
            reg = {regions[i]: v['regions'][i] for i in np.nonzero(v['regions'])[0]}
            neuropil_list = []
            for k, n in reg.items():
                region = all_rois[k]
                neuropil_list.append('{}:{}'.format(
                    region['Neuropil'], n))


            neuropil_list = ';'.join(neuropil_list)

            writer.writerow([pre, post, len(v['pre_x']), str(v['pre_confidence']), \
                             str(v['post_confidence']), str(v['pre_x']), str(v['pre_y']), str(v['pre_z']), \
                             str(v['post_x']), str(v['post_y']), str(v['post_z']), \
                             neuropil_list])   

## Loading NeuroArch Database

In [None]:
import neuroarch.na as na

In [None]:
vnc = na.NeuroArch('manc', port = 2424, mode = 'o', version = "1.0.0",
                   maintainer_name = "", maintainer_email = "")

In [None]:
species = vnc.add_Species('Drosophila melanogaster', stage = 'adult',
                            sex = 'male',
                            synonyms = ['fruit fly', 'common fruit fly', 'vinegar fly'])

In [None]:
version = '1.0'
datasource = vnc.add_DataSource('manc', version = version,
                                      url = 'https://www.janelia.org/project-team/flyem/manc-connectome',
                                      species = species)
vnc.default_DataSource = datasource

In [None]:
vnc.add_Subsystem('VNC', synonyms = ['ventral nerve cord'])

In [None]:
with open('filter_file_tmp.mlx', 'w') as f:
    f.write("""<!DOCTYPE FilterScript>
<FilterScript>
 <filter name="Simplification: Quadric Edge Collapse Decimation">
  <Param type="RichInt" value="60000" name="TargetFaceNum"/>
  <Param type="RichFloat" value="0.2" name="TargetPerc"/>
  <Param type="RichFloat" value="1" name="QualityThr"/>
  <Param type="RichBool" value="true" name="PreserveBoundary"/>
  <Param type="RichFloat" value="1" name="BoundaryWeight"/>
  <Param type="RichBool" value="true" name="OptimalPlacement"/>
  <Param type="RichBool" value="true" name="PreserveNormal"/>
 </filter>
</FilterScript>""")

In [None]:
for k, v in all_rois.items():
    if v['Neuropil'] is not None:
        if isinstance(v['Neuropil'], list):
            continue
        ms = ml.MeshSet()
        ms.load_new_mesh("corrected_roi/{}.obj".format(k))
        ms.load_filter_script('filter_file_tmp.mlx')
        ms.apply_filter_script()
        current_mesh = ms.current_mesh()
        vnc.add_Neuropil(k,
                               morphology = {'type': 'mesh',
                                             "vertices": (current_mesh.vertex_matrix()).flatten().tolist(),
                                             "faces": current_mesh.face_matrix().flatten().tolist()},
                               subsystem = v['System'])

#### Load Neurons

In [None]:
neuron_list = pd.read_csv('neurons_all.csv')

In [None]:
all_swc = [int(n.split('/')[1].split('.')[0]) for n in glob.glob('swc/*.swc')]
has_swc = neuron_list['bodyID'].isin(all_swc)

In [None]:
swc_dir = 'swc'
uname_dict = {}
segment_ids = set()
added = 0
unadded = []
to_combine = []
loaded_to_segment = []

neurotransmitter_translation = {'acetylcholine': 'acetylcholine',
                                'gaba': 'GABA',
                                'glutamate': 'glutamate'}
x = 'x'
y = 'y'
z = 'z'

choose = ((neuron_list['status']=='Traced') & (neuron_list['statusLabel']!='Leaves')) | (~neuron_list['instance'].isna())

for i, row in tqdm(neuron_list[choose].iterrows()):
    bodyID = row['bodyID']
#     cell_type = row['type']
#     name = row['instance']
    name = None
    cell_type = None
    
    segment = False
    
    info = {}
    neurotransmitter = None
    
    if not isinstance(row['instance'], str) or row['instance'] == 'TBD':
        if isinstance(row['systematicType'], str):
            if isinstance(row['somaNeuromere'], str):
                name = '{}_{}'.format(row['systematicType'], row['somaNeuromere'])
            elif isinstance(row['entryNerve'], str):
                name = '{}_{}'.format(row['systematicType'], row['entryNerve'].split('_')[0])
            elif isinstance(row['exitNerve'], str):
                name = '{}_{}'.format(row['systematicType'], row['exitNerve'].split(' ')[0].split('_')[0])
            cell_type = row['systematicType']
            if isinstance(row['type'], str) and row['type'] != row['systematicType']:
                info['synonym'] = row['type']
        elif isinstance(row['type'], str):
            if isinstance(row['somaNeuromere'], str):
                name = '{}_{}'.format(row['type'], row['somaNeuromere'])
            elif isinstance(row['entryNerve'], str):
                name = '{}_{}'.format(row['type'], row['entryNerve'].split('_')[0])
            elif isinstance(row['exitNerve'], str):
                name = '{}_{}'.format(row['type'], row['exitNerve'].split(' ')[0].split('_')[0])
            cell_type = row['type']
        else:
            cell_type = 'unknown'
            name = 'unknown_{}'.format(bodyID)
        if isinstance(row['somaSide'], str):
            name = '{}_{}'.format(name, row['somaSide'][0])
    else:
        name = row['instance']
        if isinstance(row['systematicType'], str):
            cell_type = row['systematicType']
        elif isinstance(row['type'], str):
            cell_type = row['type']
        else:
            cell_type = 'unknown'
    if cell_type != 'unknown':
        if name not in uname_dict:
            uname_dict[name] = 0
        uname_dict[name] += 1
        name = '{}_{}'.format(name, uname_dict[name])
    
    added += 1    
    c_neuropils = row['neuropils']
    arborization = []
    
    if isinstance(c_neuropils, str):
        dendrites = {j.split(':')[0]: int(j.split(':')[2]) for j in c_neuropils.split(';') if int(j.split(':')[2]) > 0}
        axons = {j.split(':')[0]: int(j.split(':')[1]) for j in c_neuropils.split(';') if int(j.split(':')[1]) > 0}
        arborization.append({'dendrites': dendrites, 'axons': axons, 'type': 'neuropil'})
    
    try:
        df = na.load_swc('{}/{}.swc'.format(swc_dir, bodyID))
        morphology = {'x': (df['x']*0.008).tolist(),
                      'y': (df['y']*0.008).tolist(),
                      'z': (df['z']*0.008).tolist(),
                      'r': (df['r']*0.008).tolist(),
                      'parent': df['parent'].tolist(),
                      'identifier': [0]*(len(df['x'])),
                      'sample': df['sample'].tolist(),
                      'type': 'swc'}
    except FileNotFoundError:
        morphology = None
        if segment: # no name, not traced, no morph
            to_combine.append(bodyID)
            continue
        else:
            segment = True
            loaded_to_segment.append(bodyID)
    
    if isinstance(row['statusLabel'], str):
        info['MANC Trace Status'] = row['statusLabel']
    else:
        info['MANC Trace Status'] = 'Untraced'

    if not segment:
        if isinstance(row['predictedNt'], str):
            if row['predictedNt'] != 'unknown':
                neurotransmitter = [neurotransmitter_translation[row['predictedNt']]]
                info['predicted_neurotransmitter_probability'] = str(row['predictedNtProb'])
        if isinstance(row['somaLocation'], str):
            soma_loc = {k:v*0.008 for k,v in eval(row["somaLocation"]).items()}
            info['soma_location'] = "{},{},{}".format(soma_loc['x'],soma_loc['y'],soma_loc['z'])
        if isinstance(row['hemilineage'], str):
            info['hemilineage'] = row['hemilineage']
        if isinstance(row['class'], str):
            info['class'] = row['class']
        if isinstance(row['subclass'], str):
            info['subclass'] = row['subclass']
        if isinstance(row['modality'], str):
            info['modality'] = row['modality']
        
        vnc.add_Neuron(name, # uname
                       cell_type, # name
                       referenceId = str(bodyID), #referenceId
                       info = info if len(info) else None,
                       morphology = morphology,
                       arborization = arborization,
                       neurotransmitters = neurotransmitter)
    else:
        cell_type = 'segment'
        name = 'segment_{}'.format(bodyID)
        vnc.add_NeuronFragment(name,
                               cell_type,
                               referenceId = str(bodyID),
                               info = info if len(info) else None,
                               morphology = morphology,
                               arborization = arborization)

In [None]:
not_chosen = neuron_list[~choose]
n_added = 0
for i, row in tqdm(not_chosen.iterrows()):
    bodyID = row['bodyID']

    if not os.path.exists('{}/{}.swc'.format(swc_dir, bodyID)):
        continue

    cell_type = 'segment'
    name = 'segment_{}'.format(bodyID)

    info = {}

    added += 1

    c_neuropils = row['neuropils']
    arborization = []
    if isinstance(c_neuropils, str):
        dendrites = {j.split(':')[0]: int(j.split(':')[2]) for j in c_neuropils.split(';') if int(j.split(':')[2]) > 0}
        axons = {j.split(':')[0]: int(j.split(':')[1]) for j in c_neuropils.split(';') if int(j.split(':')[1]) > 0}
        arborization.append({'dendrites': dendrites, 'axons': axons, 'type': 'neuropil'})
    
    df = na.load_swc('{}/{}.swc'.format(swc_dir, bodyID))
    morphology = {'x': (df['x']*0.008).tolist(),
                  'y': (df['y']*0.008).tolist(),
                  'z': (df['z']*0.008).tolist(),
                  'r': (df['r']*0.008).tolist(),
                  'parent': df['parent'].tolist(),
                  'identifier': [0]*(len(df['x'])),
                  'sample': df['sample'].tolist(),
                  'type': 'swc'}
    if isinstance(row['statusLabel'], str):
        info['MANC Trace Status'] = row['statusLabel']
    else:
        info['MANC Trace Status'] = 'Untraced'

    vnc.add_NeuronFragment(name,
                                 cell_type,
                                 referenceId = str(bodyID),
                                 info = info if len(info) else None,
                                 morphology = morphology,
                                 arborization = arborization)
    n_added += 1


In [None]:
vnc.flush_edges()

In [None]:
# find all the neurons so they can be keyed by their referenceId.
neurons = vnc.sql_query('select from NeuronAndFragment').node_objs
# set the cache so there is no need for database access.
for neuron in neurons:
    vnc.set('NeuronAndFragment', neuron.uname, neuron, vnc.default_DataSource)
neuron_ref_to_obj = {int(neuron.referenceId): neuron for neuron in neurons}

### Load Synapses

In [None]:
tmp = {}
morph_dict = {}

for chunk in tqdm(pd.read_csv('synapses_all.csv', chunksize=100000)):
    for i, row in chunk.iterrows():
        pre = int(row['pre_id'])
        post = int(row['post_id'])
        if pre not in neuron_ref_to_obj:
            pre = -1
        if post not in neuron_ref_to_obj:
            post = -1
        if pre == -1 and post == -1:
            continue
        
        pre_conf = np.array(eval(row['pre_confidence']))/1e6
        post_conf = np.array(eval(row['post_confidence']))/1e6
        NHP = np.sum(np.logical_and(post_conf>=0.7, pre_conf>=0.7))
        
        if pre == -1:
            if post not in tmp:
                tmp[post] = {}
            if 'pre' not in tmp[post]:
                tmp[post]['pre'] = {'pre_x': [], 'pre_y': [], 'pre_z': [], 'post_x': [], 'post_y': [], 'post_z': [],
                                    'pre_confidence': [], 'post_confidence': [],
                                    'N': 0, 'NHP': 0}
            tmp[post]['pre']['pre_x'].append(np.array(eval(row['pre_x']))*0.008)
            tmp[post]['pre']['pre_y'].append(np.array(eval(row['pre_y']))*0.008)
            tmp[post]['pre']['pre_z'].append(np.array(eval(row['pre_z']))*0.008)
            tmp[post]['pre']['post_x'].append(np.array(eval(row['post_x']))*0.008)
            tmp[post]['pre']['post_y'].append(np.array(eval(row['post_y']))*0.008)
            tmp[post]['pre']['post_z'].append(np.array(eval(row['post_z']))*0.008)
            tmp[post]['pre']['pre_confidence'].append(pre_conf)
            tmp[post]['pre']['post_confidence'].append(post_conf)
            tmp[post]['pre']['N'] += row['N']
            tmp[post]['pre']['NHP'] += NHP                        
        elif post == -1:
            if pre not in tmp:
                tmp[pre] = {}
            if 'post' not in tmp[pre]:
                tmp[pre]['post'] = {'pre_x': [], 'pre_y': [], 'pre_z': [], 'post_x': [], 'post_y': [], 'post_z': [],
                                    'pre_confidence': [], 'post_confidence': [],
                                    'N': 0, 'NHP': 0}
            tmp[pre]['post']['pre_x'].append(np.array(eval(row['pre_x']))*0.008)
            tmp[pre]['post']['pre_y'].append(np.array(eval(row['pre_y']))*0.008)
            tmp[pre]['post']['pre_z'].append(np.array(eval(row['pre_z']))*0.008)
            tmp[pre]['post']['post_x'].append(np.array(eval(row['post_x']))*0.008)
            tmp[pre]['post']['post_y'].append(np.array(eval(row['post_y']))*0.008)
            tmp[pre]['post']['post_z'].append(np.array(eval(row['post_z']))*0.008)
            tmp[pre]['post']['pre_confidence'].append(pre_conf)
            tmp[pre]['post']['post_confidence'].append(post_conf)
            tmp[pre]['post']['N'] += row['N']
            tmp[pre]['post']['NHP'] += NHP
        else:
            pre_neuron = neuron_ref_to_obj[pre]
            post_neuron = neuron_ref_to_obj[post]
            c_neuropils = row['neuropils']
            arborization = []
            neuropils = {}
            if isinstance(c_neuropils, str):
                arborization.append({'type': 'neuropil',
                               'synapses': {j.split(':')[0]: int(j.split(':')[1]) \
                                            for j in c_neuropils.split(';') \
                                            if int(j.split(':')[1]) > 0}})
            content = {'type': 'swc'}
            content['x'] = (np.array(eval(row['pre_x'])+eval(row['post_x']))*0.008).tolist()
            content['y'] = (np.array(eval(row['pre_y'])+eval(row['post_y']))*0.008).tolist()
            content['z'] = (np.array(eval(row['pre_z'])+eval(row['post_z']))*0.008).tolist()
            content['r'] = [0]*len(content['x'])
            content['parent'] = [-1]*(len(content['x'])//2) + [i+1 for i in range(len(content['x'])//2)]
            content['identifier'] = [7]*(len(content['x'])//2) + [8]*(len(content['x'])//2)
            content['sample'] = [i+1 for i in range(len(content['x']))]
            content['confidence'] = pre_conf.tolist() + post_conf.tolist()

            synapse = vnc.add_Synapse(pre_neuron, post_neuron, N = row['N'], NHP = NHP)
            morph_dict[synapse._id] = {'morphology': content,
                                       'arborization': arborization}


In [None]:
vnc.flush_edges()

In [None]:
version = '1.0'
species = vnc.sql_query('select from Species').node_objs[0]
notional_datasource = vnc.add_DataSource('notional', version = version,
                                         species = species)


In [None]:
for n, v in tqdm(tmp.items()):
    if 'pre' in v:
        post_neuron = neuron_ref_to_obj[n]
        cell_type = 'combined_untraced_segments'
        name = 'segments_presynaptic_to_{}'.format(post_neuron.uname)
        pre_neuron = vnc.add_NeuronFragment(
                                     name,
                                     cell_type,
                                     data_source = notional_datasource)
        
        content = {'type': 'swc'}
        content['x'] = np.concatenate(v['pre']['pre_x'] + v['pre']['post_x']).tolist()
        content['y'] = np.concatenate(v['pre']['pre_y'] + v['pre']['post_y']).tolist()
        content['z'] = np.concatenate(v['pre']['pre_z'] + v['pre']['post_z']).tolist()
        content['r'] = [0]*len(content['x'])
        content['parent'] = [-1]*(len(content['x'])//2) + [i+1 for i in range(len(content['x'])//2)]
        content['identifier'] = [7]*(len(content['x'])//2) + [8]*(len(content['x'])//2)
        content['sample'] = [i+1 for i in range(len(content['x']))]
        content['confidence'] = np.concatenate(v['pre']['pre_confidence'] + v['pre']['post_confidence']).tolist()
        synapse = vnc.add_Synapse(pre_neuron, post_neuron, N = v['pre']['N'], NHP = v['pre']['NHP'])
        morph_dict[synapse._id] = {'morphology': content}
    if 'post' in v:
        pre_neuron = neuron_ref_to_obj[n]
        cell_type = 'combined_untraced_segments'
        name = 'segments_postsynaptic_to_{}'.format(pre_neuron.uname)
        post_neuron = vnc.add_NeuronFragment(
                                     name,
                                     cell_type,
                                     data_source = notional_datasource)
        content = {'type': 'swc'}
        content['x'] = np.concatenate(v['post']['pre_x'] + v['post']['post_x']).tolist()
        content['y'] = np.concatenate(v['post']['pre_y'] + v['post']['post_y']).tolist()
        content['z'] = np.concatenate(v['post']['pre_z'] + v['post']['post_z']).tolist()
        content['r'] = [0]*len(content['x'])
        content['parent'] = [-1]*(len(content['x'])//2) + [i+1 for i in range(len(content['x'])//2)]
        content['identifier'] = [7]*(len(content['x'])//2) + [8]*(len(content['x'])//2)
        content['sample'] = [i+1 for i in range(len(content['x']))]
        content['confidence'] = np.concatenate(v['post']['pre_confidence'] + v['post']['post_confidence']).tolist()
        synapse = vnc.add_Synapse(pre_neuron, post_neuron, N = v['post']['N'], NHP = v['post']['NHP'])
        morph_dict[synapse._id] = {'morphology': content}

In [None]:
vnc.flush_edges()

In [None]:
for rid, data in tqdm(morph_dict.items()):
    if 'morphology' in data:
        vnc.add_morphology(rid, data['morphology'])
    if 'arborization' in data:
        vnc.add_synapse_arborization(rid, data['arborization'])