In [72]:
from scipy.io.wavfile import read as wavread, write as wavwrite
import glob
import os
from joblib import Parallel, delayed
import multiprocessing
import librosa
from bwfsoundfile import BwfSoundFile
from IPython.display import display, Audio
import numpy as np

In [73]:
file_list = []
root_dir = 'data/Gamemaster_Audio'
out_dir = 'data/Gamemaster_Audio_16KHz'
out_sample_rate = 16000
for fp in glob.iglob(os.path.join(root_dir, '**/*.wav'), recursive=True):
    file_list.append(os.path.relpath(fp, root_dir))

In [74]:
len(file_list)

8076

In [81]:
def process_file(fp):
    in_fp = os.path.join(root_dir, fp)
    out_fp = os.path.join(out_dir, fp)
    try:
        sr, wav = wavread(in_fp)
    except ValueError:
        with BwfSoundFile(in_fp) as bwf_file:
            sr = bwf_file.samplerate
            wav = bwf_file.read(-1, dtype='float32')
    if wav.dtype == np.int16:
        wav = wav.astype(np.float32)
        wav /= 32768.
    elif wav.dtype == np.int32:
        wav = wav.astype(np.float32)
        wav /= 2147483648.
    resampled_wav = librosa.resample(wav.T, sr, out_sample_rate).T

    working_dir = os.path.dirname(out_fp)
    os.makedirs(working_dir, exist_ok=True)
    wavwrite(out_fp, out_sample_rate, resampled_wav)

In [82]:
process_file(file_list[0])

In [83]:
num_cores = multiprocessing.cpu_count()
results = Parallel(n_jobs=num_cores, verbose=5)(delayed(process_file)(fp) for fp in file_list)

[Parallel(n_jobs=16)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  40 tasks      | elapsed:    3.5s
[Parallel(n_jobs=16)]: Done 130 tasks      | elapsed:    6.0s
[Parallel(n_jobs=16)]: Done 256 tasks      | elapsed:   14.1s
[Parallel(n_jobs=16)]: Done 502 tasks      | elapsed:   16.2s
[Parallel(n_jobs=16)]: Done 898 tasks      | elapsed:   19.4s
[Parallel(n_jobs=16)]: Done 1366 tasks      | elapsed:   26.3s
[Parallel(n_jobs=16)]: Done 1715 tasks      | elapsed:   30.6s
[Parallel(n_jobs=16)]: Done 2160 tasks      | elapsed:   34.5s
[Parallel(n_jobs=16)]: Done 3358 tasks      | elapsed:   40.8s
[Parallel(n_jobs=16)]: Done 4280 tasks      | elapsed:   54.1s
[Parallel(n_jobs=16)]: Done 5978 tasks      | elapsed:  1.1min
[Parallel(n_jobs=16)]: Done 7046 tasks      | elapsed:  1.2min
[Parallel(n_jobs=16)]: Done 8076 out of 8076 | elapsed:  1.6min finished
