This tool converts a folder of samples to a big rectangular matrix with one mono sample per row.

Samples should be placed in `data/mydataset/samples/`. They could be `.mp3`, `.wav`, or anything else that ffmpeg can work with. They may be all in one folder, or in nested sub-folders.

Change the path below to point to the root directory, e.g., `data/mydataset/`.

The samplerate `sr` is not necessarily the native samplerate of the samples, it's the samplerate you want to load them at.

The output of this notebook is:
* `data/mydataset/durations.txt`
* `data/mydataset/filenames.txt`
* `data/mydataset/samples.npy`

In [17]:
import os
import re
import string
import numpy as np
from os.path import join
from utils import *
from multiprocessing import Pool


sampleRootDirectory = os.path.expanduser("~/Desktop/Samples")
drumNames = ["kick", "tom", "snare", "clap", "hi.hat", "ride", "crash"]





#Collect all of the sample path strings
fileNames = []
for directory in os.walk(sampleRootDirectory):
    for file in directory[2]:
        oldPath = directory[0]+"/"+file
        #shutil.copyfile(oldPath, newPath)
        if oldPath[-4:] == '.wav':
            fileNames.append(oldPath)

        
#Create the regex patterns used to filter the samples into drum classes
makeRegex = lambda drumStr : '.*'+"".join(map(lambda c : '['+c+c.upper()+']' if c.isalpha() else c, drumStr))+'.*'
drumRegex = [makeRegex(drum) for drum in drumNames]


#filter filenames into sets by matching vs regex
drumFileSets = {}
for i in range(len(drumNames)):
    drumFileSets[drumNames[i]] = {fileName for fileName in fileNames if re.match(drumRegex[i], fileName)}
    

#check if any samples end up in more than 1 class
intersections = []
for i in range(len(drumNames)):
    for j in range(i+1, len(drumNames)):
        d1 = drumNames[i]
        d2 = drumNames[j]
        intersectionSet = drumFileSets[d1] & drumFileSets[d2]
        if len(intersectionSet) > 0:
            intersections.append([d1, d2, intersectionSet])
            #print d1, d2, len(drumFileSets[d1]), len(drumFileSets[d2]), len(intersectionSet)

#note - for some classes, this siginficantly reduces the number of samples
for sect in intersections:
    d1 = sect[0]
    d2 = sect[1]
    sectSet = sect[2]
    drumFileSets[d1] = drumFileSets[d1] - sectSet
    drumFileSets[d2] = drumFileSets[d2] - sectSet

In [18]:
data_root = 'drumData'
sr = 48000
max_length = sr*4 # ignore samples longer than 4 seconds
fixed_length = sr/4 # trim all samples to 250 milliseconds
limit = None # set this to 100 to only load the first 100 samples

In [19]:
def load_sample(fn, sr=None,
                max_length=None, fixed_length=None, normalize=True):
    if fn == '': # ignore empty filenames
        return None
    audio, _ = ffmpeg_load_audio(fn, sr, mono=True)
    duration = len(audio)
    if duration == 0: # ignore zero-length samples
        return None
    if max_length and duration >= max_length: # ignore long samples
        return None
    if fixed_length:
        audio.resize(fixed_length)
    max_val = np.abs(audio).max()
    if max_val == 0: # ignore completely silent sounds
        return None
    if normalize:
        audio /= max_val
    return (fn, audio, duration)

In [20]:
drumSampleSets = {}
for drumName in drumNames:
    files = list(drumFileSets[drumName])
    def job(fn):
        return load_sample(fn, sr=sr,
                           max_length=max_length, fixed_length=fixed_length)
    pool = Pool()
    %time drumSampleSets[drumName] = pool.map(job, files[:limit])
    print 'Processed', len(results), 'samples for ', drumName

CPU times: user 235 ms, sys: 321 ms, total: 557 ms
Wall time: 21.2 s
Processed 998 samples for  kick
CPU times: user 23 ms, sys: 21 ms, total: 44 ms
Wall time: 2.61 s
Processed 998 samples for  tom
CPU times: user 129 ms, sys: 149 ms, total: 278 ms
Wall time: 10.7 s
Processed 998 samples for  snare
CPU times: user 68.8 ms, sys: 56.6 ms, total: 125 ms
Wall time: 5.95 s
Processed 998 samples for  clap
CPU times: user 11.3 ms, sys: 10.3 ms, total: 21.7 ms
Wall time: 814 ms
Processed 998 samples for  hi.hat
CPU times: user 18.2 ms, sys: 15.6 ms, total: 33.7 ms
Wall time: 1.46 s
Processed 998 samples for  ride
CPU times: user 70.8 ms, sys: 54.5 ms, total: 125 ms
Wall time: 6.63 s
Processed 998 samples for  crash


In [21]:
drumLengths = []

for drumName in drumNames:
    valid = filter(None, drumSampleSets[drumName])
    filenames = [x[0] for x in valid]
    samples = [x[1] for x in valid]
    durations = [x[2] for x in valid]
    samples = np.asarray(samples)
    drumLengths.append(len(samples))
    np.savetxt(join(data_root, drumName+'_filenames.txt'), filenames, fmt='%s')
    np.savetxt(join(data_root, drumName+'_durations.txt'), durations, fmt='%i')
    %time np.save(join(data_root, drumName+'_samples.npy'), samples)
    print 'Saved', len(valid), 'samples of '+drumName

pickle.dump(drumNames, open(data_root+"/drumNames.pickle", "w"))
pickle.dump(drumLengths, open(data_root+"/drumLengths.pickle", "w"))

CPU times: user 15.1 ms, sys: 378 ms, total: 393 ms
Wall time: 589 ms
Saved 5158 samples of kick
CPU times: user 1.9 ms, sys: 35.3 ms, total: 37.2 ms
Wall time: 71.1 ms
Saved 422 samples of tom
CPU times: user 7.01 ms, sys: 150 ms, total: 157 ms
Wall time: 194 ms
Saved 2546 samples of snare
CPU times: user 2.93 ms, sys: 70.7 ms, total: 73.6 ms
Wall time: 91.4 ms
Saved 1324 samples of clap
CPU times: user 493 µs, sys: 7.82 ms, total: 8.31 ms
Wall time: 14.5 ms
Saved 159 samples of hi.hat
CPU times: user 1.94 ms, sys: 13.9 ms, total: 15.8 ms
Wall time: 26.9 ms
Saved 228 samples of ride
CPU times: user 1.61 ms, sys: 40.2 ms, total: 41.8 ms
Wall time: 53.5 ms
Saved 723 samples of crash
