In [1]:
from alive_progress import alive_it, alive_bar
import madmom
from joblib import Parallel, delayed
import yaml
import json
from loguru import logger as log
from time import sleep

In [2]:
with open("../params.yaml", "r", encoding="utf-8") as file:
        params = yaml.safe_load(file)

with open(
    "../reports/onset-detection/test-onsets.json", "r", encoding="utf-8"
) as file:
    test_onsets = json.load(file)

with open("../reports/onset-detection/val-onsets.json", "r", encoding="utf-8") as file:
    val_onsets = json.load(file)

onsets: dict = {
    "test": test_onsets,
    "val": val_onsets,
}

In [3]:
%%timeit -n 1 -r 1

beats: dict = {"test": {}, "val": {}}

for stage in onsets.keys():
    log.info(f"filtering onsets of stage ’{stage}’")
    bar = alive_it(onsets[stage].keys(), force_tty=True)
    for file in bar:
        bar.text = file
        sleep(0.2)
        beats[stage][file] = file

2022-06-12 20:31:36.720 | INFO     | __main__:inner:4 - filtering onsets of stage ’test’


|████████████████████████████████████████| 50/50 [100%] in 10.1s (4.94/s)                                               


2022-06-12 20:31:46.917 | INFO     | __main__:inner:4 - filtering onsets of stage ’val’


|████████████████████████████████████████| 26/26 [100%] in 5.3s (4.95/s)                                                
15.5 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


now we want to parallelize this task with joblib

In [16]:
beats: dict = {"test": {}, "val": {}}

In [17]:
def file_func(file, b):
    sleep(0.2)
    b[stage][file] = file

In [18]:
for stage in onsets.keys():
    log.info(f"filtering onsets of stage ’{stage}’")
    Parallel(n_jobs=1, require='sharedmem')(delayed(file_func)(file, beats) for file in onsets[stage].keys())

2022-06-12 20:38:35.354 | INFO     | __main__:<cell line: 1>:2 - filtering onsets of stage ’test’
2022-06-12 20:38:45.386 | INFO     | __main__:<cell line: 1>:2 - filtering onsets of stage ’val’


-> measuring the speedup in jupyter is pointless, as musltiprocessing does not work properly in jupyter, so only n_jobs=1 works here...

In [19]:
beats

{'test': {'test08': 'test08',
  'test33': 'test33',
  'test48': 'test48',
  'test49': 'test49',
  'test03': 'test03',
  'test43': 'test43',
  'test19': 'test19',
  'test11': 'test11',
  'test12': 'test12',
  'test04': 'test04',
  'test15': 'test15',
  'test40': 'test40',
  'test20': 'test20',
  'test29': 'test29',
  'test24': 'test24',
  'test27': 'test27',
  'test34': 'test34',
  'test18': 'test18',
  'test39': 'test39',
  'test38': 'test38',
  'test10': 'test10',
  'test21': 'test21',
  'test22': 'test22',
  'test05': 'test05',
  'test47': 'test47',
  'test35': 'test35',
  'test23': 'test23',
  'test44': 'test44',
  'test17': 'test17',
  'test37': 'test37',
  'test32': 'test32',
  'test42': 'test42',
  'test14': 'test14',
  'test13': 'test13',
  'test26': 'test26',
  'test25': 'test25',
  'test28': 'test28',
  'test31': 'test31',
  'test46': 'test46',
  'test07': 'test07',
  'test41': 'test41',
  'test01': 'test01',
  'test36': 'test36',
  'test16': 'test16',
  'test06': 'test06',
  