In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import os
import librosa
import numpy
import soundfile

from tqdm import tqdm_notebook as tqdm
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed

In [3]:
def extact_mfcc_worker(node, path_to_result_folder, rel_path):
    path_to_result = "{}/{}".format(path_to_result_folder, rel_path)
    os.makedirs("{}/{}".format(path_to_result_folder, rel_path), exist_ok=True)
    for f in node[2]:
        adio_series, rate = librosa.core.load('{}/{}'.format(node[0], f))
        mfcc_features = librosa.feature.mfcc(adio_series, rate)   
        numpy.savetxt("{}/{}.csv".format(path_to_result, f), mfcc_features, delimiter=",")

In [4]:
def extract_mfcc(path_to_start_folder, path_to_result_folder):
    tree = tqdm(os.walk(path_to_start_folder))
    for node in tree:
        rel_path = os.path.relpath(node[0])
        if node[2]:
            extact_mfcc_worker(node, path_to_result_folder, rel_path)

In [5]:
def extract_mfcc_processes(path_to_start_folder, path_to_result_folder, num_proc=8):
    tree = tqdm(os.walk(path_to_start_folder))
    with ProcessPoolExecutor(max_workers=num_proc) as pool:
        for node in tree:
            rel_path = os.path.relpath(node[0])
            if node[2]:
                pool.submit(extact_mfcc_worker, node, path_to_result_folder, rel_path)
    

In [6]:
def extract_mfcc_threads(path_to_start_folder, path_to_result_folder, num_threads=8):
    tree = tqdm(os.walk(path_to_start_folder))
    with ThreadPoolExecutor(max_workers=num_threads) as pool:
        for node in tree:
            rel_path = os.path.relpath(node[0])
            if node[2]:
                pool.submit(extact_mfcc_worker, node, path_to_result_folder, rel_path)

### Попробуем запустить последовательно

In [60]:
%%timeit -r1 -n1
extract_mfcc("aac", "result")

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


2h 50min 31s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


### Параллельно на процессах

In [7]:
%%timeit -r1 -n1
extract_mfcc_processes("aac", "result_proc")

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


1h 21min 36s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


### Параллельно на потоках

In [8]:
%%timeit -r1 -n1
extract_mfcc_threads("aac", "result_threds")

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


2h 30s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
