This notebook contains the results of several experiments I've run using the Brainlit package.

In [2]:
from brainlit.utils.ngl_pipeline import NeuroglancerSession
from brainlit.preprocessing.features import *
import pandas as pd
import numpy as np
import glob
import time

# Parallel vs. Serial Extraction

## Neighborhood features

In [6]:
nbr = NeighborhoodFeatures(url="s3://mouse-light-viz/precomputed_volumes/brain1", size=[5,5,5], offset=[15,15,15])

serial_start = time.time()
nbr.fit(seg_ids=[2,7,11], file_path="./test_data/neighborhood_", batch_size=20, num_verts=20, n_jobs=1)
serial_end = time.time()

Downloading: 100%|██████████| 1/1 [00:00<00:00,  5.37it/s]
Downloading: 100%|██████████| 1/1 [00:00<00:00,  9.40it/s]
Downloading:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading: 100%|██████████| 1/1 [00:00<00:00, 10.21it/s]
Downloading:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading: 100%|██████████| 1/1 [00:00<00:00, 10.37it/s]
Downloading:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading: 100%|██████████| 1/1 [00:00<00:00,  8.81it/s]
Downloading:   0%|          | 0/1 [00:01<?, ?it/s]
Downloading:   0%|          | 0/1 [00:01<?, ?it/s]
Downloading:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading:   0%|          | 0/1 [00:00<?

In [7]:
parallel_start = time.time()
nbr.fit(seg_ids=[2,7,11], file_path="./test_data/neighborhood_", batch_size=20, num_verts=20, n_jobs=3)
parallel_end = time.time()

In [8]:
print("Serial Extraction Time: {}".format(serial_end-serial_start))
print("Parallel Extraction Time: {}".format(parallel_end-parallel_start))

Serial Extraction Time: 145.1115436553955
Parallel Extraction Time: 172.622878074646


## Linear Features

In [10]:
lin = LinearFeatures(url="s3://mouse-light-viz/precomputed_volumes/brain1", size=[5,5,5], offset=[15,15,15])
lin.add_filter("gaussian gradient", sigma=[1, 1, 0.3])
lin.add_filter("gaussian laplace", sigma=[1, 1, 0.3])
lin.add_filter("gabor", sigma=[1, 1, 0.3], phi=[0, 0], frequency=2)
lin.add_filter("gabor", sigma=[1, 1, 0.3], phi=[0, np.pi / 2], frequency=2)

In [11]:
serial_start = time.time()
nbr.fit(seg_ids=[2,7,11], file_path="./test_data/neighborhood_", batch_size=20, num_verts=20, n_jobs=1)
serial_end = time.time()

Downloading: 100%|██████████| 1/1 [00:00<00:00,  7.98it/s]
Downloading: 100%|██████████| 1/1 [00:00<00:00, 10.31it/s]
Downloading:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading: 100%|██████████| 1/1 [00:00<00:00,  9.44it/s]
Downloading:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading: 100%|██████████| 1/1 [00:00<00:00, 10.05it/s]
Downloading:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading: 100%|██████████| 1/1 [00:00<00:00,  9.73it/s]
Downloading:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading:   0%|          | 0/1 [00:00<?, ?it/s]
Downloading:   0%|          | 0/1 [00:00<?

In [12]:
parallel_start = time.time()
nbr.fit(seg_ids=[2,7,11], file_path="./test_data/neighborhood_", batch_size=20, num_verts=20, n_jobs=3)
parallel_end = time.time()

In [13]:
print("Serial Extraction Time: {}".format(serial_end-serial_start))
print("Parallel Extraction Time: {}".format(parallel_end-parallel_start))

Serial Extraction Time: 152.9577968120575
Parallel Extraction Time: 177.39951014518738


Conclusions: Parallelization does not seem to help, probably because the download speed/bandwidth is the limiting factor. Its that using multiple cores would be faster still if the download speed does not decrease per core.