This tool converts a folder of samples to a big rectangular matrix with one mono sample per row.

Samples should be placed in `data/mydataset/samples/`. They could be `.mp3`, `.wav`, or anything else that ffmpeg can work with. They may be all in one folder, or in nested sub-folders.

Change the path below to point to the root directory, e.g., `data/mydataset/`.

The samplerate `sr` is not necessarily the native samplerate of the samples, it's the samplerate you want to load them at.

The output of this notebook is:
* `data/mydataset/durations.txt`
* `data/mydataset/filenames.txt`
* `data/mydataset/samples.npy`

In [51]:
import os
import re
import string
import numpy as np
from os.path import join
from utils import *
from multiprocessing import Pool


sampleRootDirectory = os.path.expanduser("~/Desktop/Samples")
drumNames = ["kick", "tom", "snare", "clap", "hi.hat", "ride", "crash"]





#Collect all of the sample path strings
fileNames = []
for directory in os.walk(sampleRootDirectory):
    for file in directory[2]:
        oldPath = directory[0]+"/"+file
        #shutil.copyfile(oldPath, newPath)
        if oldPath[-4:] == '.wav':
            fileNames.append(oldPath)

        
#Create the regex patterns used to filter the samples into drum classes
makeRegex = lambda drumStr : '.*'+"".join(map(lambda c : '['+c+c.upper()+']' if c.isalpha() else c, drumStr))+'.*'
drumRegex = [makeRegex(drum) for drum in drumNames]


#filter filenames into sets by matching vs regex
drumSampleSets = {}
for i in range(len(drumNames)):
    drumSampleSets[drumNames[i]] = {fileName for fileName in fileNames if re.match(drumRegex[i], fileName)}
    

#check if any samples end up in more than 1 class
intersections = []
for i in range(len(drumNames)):
    for j in range(i+1, len(drumNames)):
        d1 = drumNames[i]
        d2 = drumNames[j]
        intersectionSet = drumSampleSets[d1] & drumSampleSets[d2]
        if len(intersectionSet) > 0:
            intersections.append([d1, d2, intersectionSet])
            #print d1, d2, len(drumSampleSets[d1]), len(drumSampleSets[d2]), len(intersectionSet)

#note - for some classes, this siginficantly reduces the number of samples
for sect in intersections:
    d1 = sect[0]
    d2 = sect[1]
    sectSet = sect[2]
    drumSampleSets[d1] = drumSampleSets[d1] - sectSet
    drumSampleSets[d2] = drumSampleSets[d2] - sectSet

In [52]:
data_root = 'drumData'
sr = 48000
max_length = sr*4 # ignore samples longer than 4 seconds
fixed_length = sr/4 # trim all samples to 250 milliseconds
limit = None # set this to 100 to only load the first 100 samples

In [53]:
def load_sample(fn, sr=None,
                max_length=None, fixed_length=None, normalize=True):
    if fn == '': # ignore empty filenames
        return None
    audio, _ = ffmpeg_load_audio(fn, sr, mono=True)
    duration = len(audio)
    if duration == 0: # ignore zero-length samples
        return None
    if max_length and duration >= max_length: # ignore long samples
        return None
    if fixed_length:
        audio.resize(fixed_length)
    max_val = np.abs(audio).max()
    if max_val == 0: # ignore completely silent sounds
        return None
    if normalize:
        audio /= max_val
    return (fn, audio, duration)

In [54]:
for drumName in drumNames:
    files = list(drumSampleSets[drumName])
    def job(fn):
        return load_sample(fn, sr=sr,
                           max_length=max_length, fixed_length=fixed_length)
    pool = Pool()
    %time results = pool.map(job, files[:limit])
    print 'Processed', len(results), 'samples for ', drumName

CPU times: user 303 ms, sys: 569 ms, total: 872 ms
Wall time: 29.5 s
Processed 5395 samples for  kick
CPU times: user 48 ms, sys: 55.8 ms, total: 104 ms
Wall time: 3.85 s
Processed 529 samples for  tom
CPU times: user 204 ms, sys: 279 ms, total: 484 ms
Wall time: 17 s
Processed 2563 samples for  snare
CPU times: user 112 ms, sys: 140 ms, total: 253 ms
Wall time: 8.78 s
Processed 1332 samples for  clap
CPU times: user 20.2 ms, sys: 26.3 ms, total: 46.5 ms
Wall time: 1.2 s
Processed 167 samples for  hi.hat
CPU times: user 29 ms, sys: 30.2 ms, total: 59.3 ms
Wall time: 2.04 s
Processed 250 samples for  ride
CPU times: user 104 ms, sys: 98.8 ms, total: 202 ms
Wall time: 8.55 s
Processed 998 samples for  crash


In [50]:
for drumName in drumNames:
    valid = filter(None, results)
    filenames = [x[0] for x in valid]
    samples = [x[1] for x in valid]
    durations = [x[2] for x in valid]
    samples = np.asarray(samples)
    np.savetxt(join(data_root, drumName+'_filenames.txt'), filenames, fmt='%s')
    np.savetxt(join(data_root, drumName+'_durations.txt'), durations, fmt='%i')
    %time np.save(join(data_root, drumName+'_samples.npy'), samples)
    print 'Saved', len(valid), 'samples of '+drumName

CPU times: user 1.8 ms, sys: 58.1 ms, total: 59.9 ms
Wall time: 68 ms
Saved 723 samples of crash
