# Illustrating of generation of dataset for `hemibrain` to `flywire` transformation 

## Step 1: Import neccesary library modules now

In [1]:
import navis
import fafbseg
import flybrains

In [2]:
from pyroglancer.localserver import startdataserver, closedataserver
from pyroglancer.flywire import flywireurl2dict, add_flywirelayer, add_flywirehostedlayer
from pyroglancer.skeletons import to_ngskeletons, to_precomputedskels, to_precomputedskelsinfo
from pyroglancer.layers import flush_precomputed, create_nglayer, handle_skels, get_ngserver, add_hostedlayer
from pyroglancer.ngspaces import create_ngspace
from pyroglancer.volumes import to_ngmesh, to_precomputedsingleresmeshes, to_precomputedsingleresmeshesinfo

In [3]:
import navis.interfaces.neuprint as neu
from navis.interfaces.neuprint import NeuronCriteria as NC, SynapseCriteria as SC

In [4]:
import pandas as pd
import numpy as np
from time import sleep

In [5]:
from multiprocessing import Pool
import itertools
from p_tqdm import p_map

In [6]:
navis.set_loggers('ERROR')
navis.set_pbars(hide=True)

In [7]:
def try_until(func, max_tries, sleep_time):
    for _ in range(0,max_tries):
        try:
            return func()
        except:
            sleep(sleep_time)
    raise ValueError('skipping as we dont succeed after max tries..') 

## Step 2: Set configurations to fetch from data from `neuprint`

In [8]:
client = neu.Client('https://neuprint.janelia.org/', dataset='hemibrain:v1.2')

In [9]:
client

Client("https://neuprint.janelia.org", "hemibrain:v1.2")

## Step 3: Get bodyids from the csv file..

In [10]:
csvloc = '/Users/sri/Documents/dataset/EM_Hemibrain_Neuronbridge.csv'

In [11]:
dataset_df = pd.read_csv(csvloc)

In [12]:
status = dataset_df.Line.str.split('_18U', n = 1, expand=True)
status.columns = ['name', 'flip']
res = status.name.str.rsplit('_', 1, expand = True)
res.columns = ['name', 'status']
print('Different status are: ', np.unique(res.status))

Different status are:  ['LV' 'RT' 'TC']


In [13]:
# get only traced and roughly traced..
roughlytraced = res.status.str.contains('RT')
traced = res.status.str.contains('TC')
lvstatus = res.status.str.contains('LV')
selecteddataset_df = res[roughlytraced | traced]
print('Used status are: ', np.unique(selecteddataset_df.status))

Used status are:  ['RT' 'TC']


In [14]:
selectedneurons = selecteddataset_df.name.str.split('-', n = 1, expand=True)
selectedneurons.columns = ['bodyid', 'name']
selectedneurons = selectedneurons.drop_duplicates()
#selectedneurons['bodyid'] = selectedneurons['bodyid'].apply(pd.to_numeric, errors='coerce')
selectedneurons = selectedneurons.reset_index(drop = True)
print('Choosen neurons are: ', len(selectedneurons.bodyid.unique()))

Choosen neurons are:  21666


In [15]:
selectedneurons

Unnamed: 0,bodyid,name
0,989228019,FB5V
1,988909130,FB5V
2,988852391,PAM08_a
3,988632865,VES045
4,988567837,FB4G
...,...,...
21661,1002507170,PFL1
21662,1002507159,EPG
21663,1002507131,PEN_a(PEN1)
21664,1002360103,AVLP464


## Step 4: Select bodyids that will be fetched, and set host directory..

In [16]:
numbodyids = 50

In [17]:
bodyids = selectedneurons.bodyid[0:numbodyids]
bodyids = list(map(int, bodyids))

In [18]:
print('Total neurons that will be processed: ', len(bodyids))

Total neurons that will be processed:  50


In [19]:
hostdirectory = '/Users/sri/Documents/dataset/hemibrain2flywire/'

## Step 5: Get stuff from `neuprint`, transform, precompute them..

In [20]:
flush_precomputed(hostdirectory, '')

deleting.. /Users/sri/Documents/dataset/hemibrain2flywire/precomputed/


## skeleton part..

In [21]:
flush_precomputed(hostdirectory, 'skeletons')

In [22]:
nskelbatch = 25 #number of skeletons to process in one-go..

In [23]:
def transform2flywireskeletons(bodyids):
    neurons_df = neu.fetch_skeletons(bodyids, heal=1000, max_threads=nskelbatch, missing_swc='raise')
    hemibrain_neurons=navis.xform_brain(neurons_df,source='JRCFIB2018Fraw', target='FLYWIRE',verbose=False)
    #hemibrain_neurons=navis.xform_brain(neurons_df,source='JRCFIB2018Fraw', target='FAFB',verbose=False)
    skelsource, skelseglist, skelsegnamelist = to_ngskeletons(hemibrain_neurons)
    to_precomputedskels(skelsource, hostdirectory)
    return skelseglist, skelsegnamelist 

In [24]:
splitlist = np.array_split(bodyids, len(bodyids)/nskelbatch)

In [25]:
def processskelchunks(chunkbodyids, chunkidx):
    #print('processing: ', chunkbodyids)
    #skelseglist, skelsegnamelist = transform2flywireskeletons(chunkbodyids)
    skelseglist, skelsegnamelist = try_until(lambda : transform2flywireskeletons(chunkbodyids), 100, 5)
    return skelseglist, skelsegnamelist,chunkidx

In [26]:
def multi_run_skelchunks(args):
    return processskelchunks(args[0],args[1])

In [27]:
totalsplits = np.arange(len(splitlist))
fullist = [x for x in zip(splitlist, totalsplits)]

In [28]:
#With multiprocessing takes blabla hrs for processing 21k skeletons..
if __name__ == "__main__":
    results_multi = p_map(multi_run_skelchunks,fullist)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=2.0), HTML(value='')))




In [29]:
allskelseglist = []
allskelsegnamelist = []
for multidx, multival in enumerate(results_multi):
    allskelseglist.append(results_multi[multidx][0]) #Just store the result now..
    allskelsegnamelist.append(results_multi[multidx][1]) #Just store the result now..

In [30]:
mergedskelseglist = list(itertools.chain.from_iterable(allskelseglist))
mergedskelsegnamelist = list(itertools.chain.from_iterable(allskelsegnamelist))

In [31]:
testbodyid = bodyids[10]
neu.fetch_skeletons(int(testbodyid))

Unnamed: 0,type,name,id,n_nodes,n_connectors,n_branches,n_leafs,cable_length,soma,units
0,navis.TreeNeuron,LC10,987424042,1452,437,179,187,80431.540764,1416,8 nanometer


In [32]:
print('Processed neuron skeletons are: ', len(mergedskelseglist))

Processed neuron skeletons are:  50


In [33]:
to_precomputedskelsinfo(mergedskelseglist, mergedskelsegnamelist, hostdirectory)

creating: /Users/sri/Documents/dataset/hemibrain2flywire/precomputed/skeletons/seg_props


## neuronmesh part..

In [34]:
flush_precomputed(hostdirectory, 'neuronmeshes')

In [35]:
neuronmeshbatch = 10 #number of neuron meshes to process in one-go..

In [36]:
def transform2flywiremeshes(bodyids):
    neuronmeshes_df = neu.fetch_mesh_neuron(bodyids, lod=2, with_synapses=False)
    hemibrainneuronmeshes_df = navis.xform_brain(neuronmeshes_df,source='JRCFIB2018Fraw', target='FLYWIRE',verbose=False)
    #hemibrainneuronmeshes_df = navis.xform_brain(neuronmeshes_df,source='JRCFIB2018Fraw', target='FAFB',verbose=False)
    volumedatasource, volumeidlist, volumenamelist = to_ngmesh(hemibrainneuronmeshes_df)
    to_precomputedsingleresmeshes(volumedatasource, hostdirectory, 'neuronmeshes')
    return volumeidlist, volumenamelist 

In [37]:
splitlist = np.array_split(bodyids, len(bodyids)/neuronmeshbatch)

In [38]:
def processmeshchunks(chunkbodyids, chunkidx):
    #print('processing: ', chunkbodyids)
    #volumeidlist, volumenamelist = transform2flywiremeshes(chunkbodyids)
    volumeidlist, volumenamelist = try_until(lambda : transform2flywiremeshes(chunkbodyids), 100, 5)
    return volumeidlist, volumenamelist,chunkidx

In [39]:
def multi_run_meshchunks(args):
    return processmeshchunks(args[0],args[1])

In [40]:
totalsplits = np.arange(len(splitlist))
fullist = [x for x in zip(splitlist, totalsplits)]

In [41]:
#With multiprocessing takes blabla hrs for processing 21k meshes..
if __name__ == "__main__":
    results_multi = p_map(multi_run_meshchunks,fullist)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))




In [42]:
allvolumeidlist = []
allvolumenamelist = []
for multidx, multival in enumerate(results_multi):
    allvolumeidlist.append(results_multi[multidx][0]) #Just store the result now..
    allvolumenamelist.append(results_multi[multidx][1]) #Just store the result now..

In [43]:
mergedvolumeidlist = list(itertools.chain.from_iterable(allvolumeidlist))
mergedvolumenamelist = list(itertools.chain.from_iterable(allvolumenamelist))

In [44]:
testbodyid = bodyids[10]
neu.fetch_mesh_neuron(int(testbodyid), lod=2, with_synapses=False)

Unnamed: 0,type,name,id,units,n_vertices,n_faces
0,navis.MeshNeuron,LC10,987424042,8 nanometer,9839,19004


In [45]:
print('Processed neuron meshes are: ', len(mergedvolumeidlist))

Processed neuron meshes are:  50


In [46]:
to_precomputedsingleresmeshesinfo(mergedvolumeidlist, mergedvolumenamelist, hostdirectory, 'neuronmeshes')

creating: /Users/sri/Documents/dataset/hemibrain2flywire/precomputed/neuronmeshes/mesh/segment_properties
creating: /Users/sri/Documents/dataset/hemibrain2flywire/precomputed/neuronmeshes/mesh/segment_names


## Step 6: Save csv file with bodyid, name

In [47]:
bodyid_celltypes = pd.DataFrame({'bodyid':mergedskelseglist})
bodyid_celltypes['name'] = mergedskelsegnamelist

In [48]:
bodyid_celltypes.head()

Unnamed: 0,bodyid,name
0,986763734,LC10
1,986763451,LC10
2,986767293,LC10
3,986793149,KCa'b'-ap2_R
4,987419663,LC10


In [49]:
#the below csv file, is a useful name file, useble with the older version used in flywire..

In [50]:
bodyid_celltypes.to_csv('bodyid_celltype.csv', index=False)