In [1]:
import os
import json
import librosa
import soundfile
import numpy as np
import pandas as pd
from collections import Counter
from os.path import join, exists
from tqdm import tqdm

In [2]:
def mix_slices(main_sample, slices):
    n_slices = len(slices)
    for i in range(n_slices):
        main_sample += slices[i]
    main_sample /= n_slices + 1
    return main_sample

In [3]:
datasets = ["freesound"]
clean_data = join("data", "clean_data")
train_data = join("data", "train_data")
sr = 44100

In [6]:
sample_n = 0
for dataset in datasets:
    csv = pd.read_csv(join(clean_data, dataset, "samples.csv"), index_col=0)
    classes = list(Counter(csv.label).keys())
    files, labels = [], np.zeros((len(csv), len(classes)))
    for i, row in tqdm(csv.iterrows()):
        file, label = row
        sample_name = f"{sample_n}.wav"
        main_sample, _ = librosa.load(join(clean_data, dataset, "samples", file), sr=sr)
        
        n_samples = np.random.randint(0, 5)
        samples = np.random.choice(np.arange(csv.values.shape[0]), size=n_samples)
        samples = csv.values[samples]
        samples_x = [librosa.load(join(clean_data, dataset, "samples", x), sr=sr)[0] 
                     for x, _ in samples]
        samples_y = [y for _, y in samples]
        samples_y.append(label)
        
        main_sample = mix_slices(main_sample, samples_x)
        for y in samples_y:
            labels[i][classes.index(y)] = 1
        files.append(file)
        
        soundfile.write(join(train_data, "samples", sample_name), main_sample, sr)
        sample_n += 1

16610it [31:45,  8.72it/s]


In [8]:
df = pd.DataFrame(labels, columns=classes)
df["file"]= files

In [9]:
df.head()

Unnamed: 0,Hi-hat,Saxophone,Clarinet,Flute,Snare_drum,Double_bass,Electric_piano,Trumpet,Acoustic_guitar,Tambourine,Violin_or_fiddle,Bass_drum,file
0,1.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.wav
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.wav
2,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.wav
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,3.wav
4,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,4.wav


In [10]:
df.to_csv(join(train_data, "samples.csv"))