In [10]:
import os
import librosa
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.externals.joblib import Parallel, parallel_backend, delayed

In [11]:
# paths
audio_dir = os.path.expanduser('~/Desktop/university/AAU/1/sound-morphing/data/borisdrums')
ann_path = os.path.expanduser('~/syncdir/datasets/borisdrums/annotation.csv')
output_dir = os.path.expanduser('~/syncdir/datasets/borisdrums/spectrograms')

In [12]:
# read annotations
df = pd.read_csv(ann_path, engine='python')
filenames =  ["{}.wav".format(filename) for filename in df['filename'].values]
labels = df['label']

In [42]:
# pad or truncate a sequence data to a fixed length.
def pad_trunc_seq(x, max_len):
    """
    :param x: ndarray, The input sequence data
    :param max_len: int, length of sequence to be padded or truncated
    :return: ndarray, Padded or truncated input sequence data.
    """

    l = len(x)
    shape = x.shape
    if l < max_len:
        pad_shape = (max_len - l,) + shape[1:]
        pad = np.zeros(pad_shape)
        x_new = np.concatenate((x, pad), axis=0)
    else:
        x_new = x[0:max_len]

    return x_new

# generate single spectrum
# set STFT parameters here!
def extract_spectrum(file_path):
    y, sr = librosa.load(file_path, sr=16000, mono=True)
    y = pad_trunc_seq(y, 16000)
    s = librosa.core.stft(y, n_fft=1024, hop_length=512, window='hann', pad_mode='reflect')
    return s

# stores single spectrum
def write_data(entry, audio_dir, output_dir):
    output_path = os.path.join(output_dir, "{}.npy".format(entry['filename']))

    if not os.path.exists(output_path):
        file_path = os.path.join(audio_dir, "{}.wav".format(entry['filename']))
        spectrum = extract_spectrum(file_path)
        np.save(output_path, spectrum)
        
# generate all the spectra using parallel jobs
def extract_spectra(df, audio_dir, output_dir):
    with parallel_backend('threading'):
        Parallel(n_jobs=-1, verbose=4)(
            delayed(write_data)(entry, audio_dir, output_dir) for index, entry in df.iterrows())


In [47]:
# test the outcome
for index, entry in df.iterrows():
    write_data(entry, audio_dir, output_dir)
    break

In [48]:
# gogogo!
extract_spectra(df, audio_dir, output_dir)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    6.1s
[Parallel(n_jobs=-1)]: Done  90 tasks      | elapsed:   30.1s
[Parallel(n_jobs=-1)]: Done 213 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done 384 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done 605 tasks      | elapsed:  3.4min
[Parallel(n_jobs=-1)]: Done 874 tasks      | elapsed:  5.0min
[Parallel(n_jobs=-1)]: Done 1193 tasks      | elapsed:  7.0min
[Parallel(n_jobs=-1)]: Done 1560 tasks      | elapsed:  9.2min
[Parallel(n_jobs=-1)]: Done 1977 tasks      | elapsed: 11.6min
[Parallel(n_jobs=-1)]: Done 2442 tasks      | elapsed: 14.5min
[Parallel(n_jobs=-1)]: Done 2957 tasks      | elapsed: 17.7min
[Parallel(n_jobs=-1)]: Done 3520 tasks      | elapsed: 21.9min
[Parallel(n_jobs=-1)]: Done 4133 tasks      | elapsed: 26.1min
[Parallel(n_jobs=-1)]: Done 4794 tasks      | elapsed: 30.5min
[Parallel(n_jobs=-1)]: Done 5505 tasks     

array(['kick-open-1620Pearl-Vel87', 'kick-hit-1624Gretsch-Vel73',
       'kick-hit-1426LudwigMuff-Vel68', ...,
       'snare-center-Ayotte513-Vel119',
       'hihat-ClosedEdge-ZildanBosphorus-Vel67',
       'snare-center-Lingnum814-Vel73'], dtype=object)