# Illustrating of generation of dataset for `hemibrain` to `flywire` transformation 

## Step 1: Import neccesary library modules now

In [1]:
import navis
import fafbseg
import flybrains

In [2]:
from pyroglancer.localserver import startdataserver, closedataserver
from pyroglancer.flywire import flywireurl2dict, add_flywirelayer, add_flywirehostedlayer
from pyroglancer.skeletons import to_ngskeletons, to_precomputedskels, to_precomputedskelsinfo
from pyroglancer.layers import flush_precomputed, create_nglayer, handle_skels, get_ngserver, add_hostedlayer
from pyroglancer.ngspaces import create_ngspace
from pyroglancer.volumes import to_ngmesh, to_precomputedsingleresmeshes, to_precomputedsingleresmeshesinfo

In [3]:
import navis.interfaces.neuprint as neu
from neuprint import fetch_neurons, NeuronCriteria as NC

In [4]:
import pandas as pd
import numpy as np
from time import sleep
import os
from os import walk
import warnings

In [5]:
from multiprocessing import Pool
import itertools
from p_tqdm import p_map

In [6]:
def try_until(func, max_tries, sleep_time):
    for _ in range(0,max_tries):
        try:
            return func()
        except:
            sleep(sleep_time)
    warnings.warn('skipping as we dont succeed after max tries..')
    return None, None

In [7]:
recompute_scratch = False #recompute everything from scratch..

In [8]:
if recompute_scratch:
    verbose_stat = False
    max_tries = 100
    sleep_time = 5
    navis.set_loggers('ERROR')
    navis.set_pbars(hide=True)
    recompute_scratchskels = True
    recompute_scratchmeshes = True
else:
    verbose_stat = True
    max_tries = 1
    sleep_time = 1
    navis.set_pbars(hide=False)
    recompute_scratchskels = False
    recompute_scratchmeshes = False

## Step 2: Set configurations to fetch from data from `neuprint`

In [9]:
client = neu.Client('https://neuprint.janelia.org/', dataset='hemibrain:v1.2')

In [10]:
client

Client("https://neuprint.janelia.org", "hemibrain:v1.2")

## Step 3: Get bodyids from the csv file..

In [11]:
csvloc = '/Users/sri/Documents/dataset/EM_Hemibrain_Neuronbridge.csv'

In [12]:
dataset_df = pd.read_csv(csvloc)

In [13]:
status = dataset_df.Line.str.split('_18U', n = 1, expand=True)
status.columns = ['name', 'flip']
res = status.name.str.rsplit('_', 1, expand = True)
res.columns = ['name', 'status']
print('Different status are: ', np.unique(res.status))

Different status are:  ['LV' 'RT' 'TC']


In [14]:
# get only traced and roughly traced..
roughlytraced = res.status.str.contains('RT')
traced = res.status.str.contains('TC')
lvstatus = res.status.str.contains('LV')
selecteddataset_df = res[roughlytraced | traced]
print('Used status are: ', np.unique(selecteddataset_df.status))

Used status are:  ['RT' 'TC']


In [15]:
selectedneurons = selecteddataset_df.name.str.split('-', n = 1, expand=True)
selectedneurons.columns = ['bodyid', 'name']
selectedneurons = selectedneurons.drop_duplicates()
#selectedneurons['bodyid'] = selectedneurons['bodyid'].apply(pd.to_numeric, errors='coerce')
selectedneurons = selectedneurons.reset_index(drop = True)
print('Choosen neurons are: ', len(selectedneurons.bodyid.unique()))

Choosen neurons are:  21666


In [16]:
selectedneurons

Unnamed: 0,bodyid,name
0,989228019,FB5V
1,988909130,FB5V
2,988852391,PAM08_a
3,988632865,VES045
4,988567837,FB4G
...,...,...
21661,1002507170,PFL1
21662,1002507159,EPG
21663,1002507131,PEN_a(PEN1)
21664,1002360103,AVLP464


## Step 4: Select bodyids that will be fetched, and set host directory..

In [17]:
#numbodyids = 50

In [18]:
#bodyids = selectedneurons.bodyid[0:numbodyids]
bodyids = selectedneurons.bodyid
bodyids = list(map(int, bodyids))
candidate_bodyids = bodyids

In [19]:
print('Total neurons that will be processed: ', len(candidate_bodyids))

Total neurons that will be processed:  21666


In [20]:
hostdirectory = '/Users/sri/Documents/dataset/hemibrain2flywire/'

## Step 5: Get stuff from `neuprint`, transform, precompute them..

In [None]:
# if recompute_scratch:
#     print('Flushing entire host directory: ',hostdirectory)
#     flush_precomputed(hostdirectory, '')
# else:
#     print('Adding to already computed files..')

## skeleton part..

In [None]:
#     skelpath = os.path.join(hostdirectory, 'precomputed/skeletons/')

In [None]:
# if recompute_scratchskels:
#     print('Flushing entire host directory for skeletons: ', skelpath)
#     flush_precomputed(hostdirectory, 'skeletons')
# else:
#     print('computing skels that need to be added..')
#     _, _, precomp_skelbodyids = next(walk(skelpath))
#     if 'info' in precomp_skelbodyids: precomp_skelbodyids.remove('info')
#     if '.DS_Store' in precomp_skelbodyids: precomp_skelbodyids.remove('.DS_Store')
#     bodyids_str = list(map(str, bodyids))
#     missingbodyids = list(set(bodyids_str) ^ set(precomp_skelbodyids))
#     candidate_bodyids = missingbodyids
#     candidate_bodyids = list(map(int, candidate_bodyids))

In [None]:
print('Total skeletons that will be added: ', len(candidate_bodyids))

In [None]:
if recompute_scratchskels:
    nskelbatch = 25 #number of skeletons to process in one-go..
else:
    nskelbatch = 1

In [None]:
print('Skeletons that will be processed in one-go: ', nskelbatch)

In [None]:
def transform2flywireskeletons(skelids):
    neurons_df = neu.fetch_skeletons(skelids, heal=1000, max_threads=nskelbatch, missing_swc='raise')
    hemibrain_neurons=navis.xform_brain(neurons_df,source='JRCFIB2018Fraw', target='FLYWIRE',verbose=verbose_stat)
    #hemibrain_neurons=navis.xform_brain(neurons_df,source='JRCFIB2018Fraw', target='FAFB',verbose=False)
    skelsource, skelseglist, skelsegnamelist = to_ngskeletons(hemibrain_neurons)
    to_precomputedskels(skelsource, hostdirectory)
    return skelseglist, skelsegnamelist 

In [None]:
splitlist = np.array_split(candidate_bodyids, len(candidate_bodyids)/nskelbatch)

In [None]:
def processskelchunks(chunkbodyids, chunkidx):
    #print('processing: ', chunkbodyids)
    #skelseglist, skelsegnamelist = transform2flywireskeletons(chunkbodyids)
    skelseglist, skelsegnamelist = try_until(lambda : transform2flywireskeletons(chunkbodyids), max_tries, sleep_time)
    return skelseglist, skelsegnamelist,chunkidx

In [None]:
def multi_run_skelchunks(args):
    return processskelchunks(args[0],args[1])

In [None]:
totalsplits = np.arange(len(splitlist))
fullist = [x for x in zip(splitlist, totalsplits)]

In [None]:
fullist

In [None]:
#With multiprocessing takes 5-6 hrs for processing 21k skeletons..
if __name__ == "__main__":
    results_multi = p_map(multi_run_skelchunks,fullist)

In [None]:
_, _, precomp_skelbodyids = next(walk(skelpath))
if 'info' in precomp_skelbodyids: precomp_skelbodyids.remove('info')
if '.DS_Store' in precomp_skelbodyids: precomp_skelbodyids.remove('.DS_Store')

In [None]:
errorbodyids = list(set(bodyids_str) ^ set(precomp_skelbodyids))
print('skeletons unable to be transformed: ', errorbodyids)

In [None]:
precomp_skelbodyids = list(map(int, precomp_skelbodyids))

In [None]:
len(precomp_skelbodyids)

In [None]:
neuron_df, _ = fetch_neurons(NC(bodyId=precomp_skelbodyids))

In [None]:
mergedskelseglist = neuron_df['bodyId'].tolist()
mergedskelsegnamelist = neuron_df['instance'].tolist()

In [None]:
mergedskelseglist = [str(x) for x in mergedskelseglist]
mergedskelsegnamelist = [str(x) for x in mergedskelsegnamelist]

In [None]:
print('Processed neuron skeletons are: ', len(mergedskelseglist))

In [None]:
to_precomputedskelsinfo(mergedskelseglist, mergedskelsegnamelist, hostdirectory)

In [None]:
#save csv file with bodyid, name

In [None]:
bodyid_celltypes = pd.DataFrame({'bodyid':mergedskelseglist})
bodyid_celltypes['name'] = mergedskelsegnamelist

In [None]:
bodyid_celltypes.head()

In [None]:
#the below csv file, is a useful name file, useble with the older version used in flywire..

In [None]:
bodyid_celltypes.to_csv('skelbodyid_celltype.csv', index=False)

## neuronmesh part..

In [37]:
neuronmeshpath = os.path.join(hostdirectory, 'precomputed/neuronmeshes/mesh')

In [38]:
if recompute_scratchmeshes:
    print('Flushing entire host dir for neuronmeshes: ', neuronmeshpath)
    flush_precomputed(hostdirectory, 'neuronmeshes')
else:
    print('computing neuronmeshes that need to be added..')
    _, _, precomp_meshbodyids = next(walk(neuronmeshpath))
    if 'info' in precomp_meshbodyids: precomp_meshbodyids.remove('info')
    if '.DS_Store' in precomp_meshbodyids: precomp_meshbodyids.remove('.DS_Store')
    precomp_meshbodyids[:] = [x for x in precomp_meshbodyids if ":" not in x]
    bodyids_str = list(map(str, bodyids))
    missingbodyids = list(set(bodyids_str) ^ set(precomp_meshbodyids))
    candidate_bodyids = missingbodyids
    candidate_bodyids = list(map(int, candidate_bodyids))

computing neuronmeshes that need to be added..


In [39]:
print('Total neuronmeshes that will be added: ', len(candidate_bodyids))

Total neuronmeshes that will be added:  0


In [40]:
if recompute_scratchmeshes:
    neuronmeshbatch = 10 #number of neuronmeshes to process in one-go..
else:
    neuronmeshbatch = 1

In [50]:
print('Neuronmeshes that will be processed in one-go: ', neuronmeshbatch)

Neuronmeshes that will be processed in one-go:  1


In [51]:
def transform2flywiremeshes(bodyids):
    neuronmeshes_df = neu.fetch_mesh_neuron(bodyids, lod=2, with_synapses=False)
    hemibrainneuronmeshes_df = navis.xform_brain(neuronmeshes_df,source='JRCFIB2018Fraw', target='FLYWIRE',verbose=verbose_stat)
    #hemibrainneuronmeshes_df = navis.xform_brain(neuronmeshes_df,source='JRCFIB2018Fraw', target='FAFB',verbose=False)
    volumedatasource, volumeidlist, volumenamelist = to_ngmesh(hemibrainneuronmeshes_df)
    to_precomputedsingleresmeshes(volumedatasource, hostdirectory, 'neuronmeshes')
    return volumeidlist, volumenamelist 

In [57]:
if candidate_bodyids:
    splitlist = np.array_split(candidate_bodyids, len(candidate_bodyids)/neuronmeshbatch)
else:
    splitlist = []

In [59]:
def processmeshchunks(chunkbodyids, chunkidx):
    #print('processing: ', chunkbodyids)
    #volumeidlist, volumenamelist = transform2flywiremeshes(chunkbodyids)
    volumeidlist, volumenamelist = try_until(lambda : transform2flywiremeshes(chunkbodyids), max_tries, sleep_time)
    return volumeidlist, volumenamelist,chunkidx

In [60]:
def multi_run_meshchunks(args):
    return processmeshchunks(args[0],args[1])

In [61]:
totalsplits = np.arange(len(splitlist))
fullist = [x for x in zip(splitlist, totalsplits)]

In [62]:
fullist

[]

In [63]:
#With multiprocessing takes 3 hrs for processing 21k meshes..
if __name__ == "__main__":
    results_multi = p_map(multi_run_meshchunks,fullist)

HBox(children=(HTML(value=''), FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0…




In [70]:
_, _, precomp_neuronmeshbodyids = next(walk(neuronmeshpath))
if 'info' in precomp_neuronmeshbodyids: precomp_neuronmeshbodyids.remove('info')
if '.DS_Store' in precomp_neuronmeshbodyids: precomp_neuronmeshbodyids.remove('.DS_Store')
precomp_neuronmeshbodyids[:] = [x for x in precomp_neuronmeshbodyids if ":" not in x]

In [73]:
errorbodyids = list(set(bodyids_str) ^ set(precomp_neuronmeshbodyids))
print('neuronmeshes unable to be transformed: ', errorbodyids)

neuronmeshes unable to be transformed:  []


In [74]:
precomp_neuronmeshbodyids = list(map(int, precomp_neuronmeshbodyids))

In [75]:
len(precomp_neuronmeshbodyids)

21666

In [76]:
neuron_df, _ = fetch_neurons(NC(bodyId=precomp_neuronmeshbodyids))

In [78]:
mergedvolumeidlist = neuron_df['bodyId'].tolist()
mergedvolumenamelist = neuron_df['instance'].tolist()

In [79]:
mergedvolumeidlist = [str(x) for x in mergedvolumeidlist]
mergedvolumenamelist = [str(x) for x in mergedvolumenamelist]

In [80]:
print('Processed neuron meshes are: ', len(mergedvolumeidlist))

Processed neuron meshes are:  21666


In [81]:
to_precomputedsingleresmeshesinfo(mergedvolumeidlist, mergedvolumenamelist, hostdirectory, 'neuronmeshes')

creating: /Users/sri/Documents/dataset/hemibrain2flywire/precomputed/neuronmeshes/mesh/segment_properties
creating: /Users/sri/Documents/dataset/hemibrain2flywire/precomputed/neuronmeshes/mesh/segment_names


In [82]:
#save csv file with bodyid, name

In [83]:
bodyid_celltypes = pd.DataFrame({'bodyid':mergedvolumeidlist})
bodyid_celltypes['name'] = mergedvolumenamelist

In [84]:
bodyid_celltypes.head()

Unnamed: 0,bodyid,name
0,200326126,CL141_L
1,202916528,FS4C(FQ12)_C4_R
2,203253072,FS4A(AB)(FQ12)_C3_R
3,203253253,SMP505(PDM22)_L
4,203257652,FS4C(FQ12)_C6_R


In [85]:
#the below csv file, is a useful name file, useble with the older version used in flywire..

In [86]:
bodyid_celltypes.to_csv('neuronmeshbodyid_celltype.csv', index=False)