In [1]:
import os
import random
import numpy as np
from scipy.io import wavfile

In [2]:
SAMPLE_RATE = 16000
SAMPLE_LENGTH = 16000
RMS_TARGET = 0.02
MAX_NOISES = 3

SPLITS = {
    "train": 2000,
    "val": 250,
    "test": 250
}

In [3]:
#Â Don't change the SEED value
SEED = 42
random.seed(SEED)
np.random.seed(SEED)

In [4]:
# Change to the data directory containing the rest of the dataset
data_dir =  "/home/robin/Bureau/ETUDES/M2/S2/TAP/Projet-Traitement-Automatique-de-la-Parole/Dataset/speech_commands_v0.02"

In [5]:
def load_wav(path):
    sr, audio = wavfile.read(path)
    return audio.astype(np.float32) / 32768.0, sr

def load_noises(noise_dir):
    noises = []
    for fname in os.listdir(noise_dir):
        if fname.endswith(".wav"):
            audio, sr = load_wav(os.path.join(noise_dir, fname))
            assert sr == SAMPLE_RATE
            noises.append(audio.astype(np.float32))
    return noises


def random_crop(signal, length):
    if len(signal) < length:
        return np.pad(signal, (0, length - len(signal)))
    start = np.random.randint(0, len(signal) - length)
    return signal[start:start + length]


def generate_silence(noises):
    n_mix = random.randint(1, MAX_NOISES)
    selected = random.sample(noises, n_mix)

    mix = np.zeros(SAMPLE_LENGTH, dtype=np.float32)

    for noise in selected:
        crop = random_crop(noise, SAMPLE_LENGTH)
        weight = np.random.uniform(0.3, 1.0)
        mix += weight * crop

    rms = np.sqrt(np.mean(mix ** 2))
    if rms > 0:
        mix *= RMS_TARGET / rms

    return mix


def save_wav(path, audio):
    audio = np.clip(audio, -1.0, 1.0)
    wavfile.write(
        path,
        16000,
        (audio * 32767).astype(np.int16)
    )

def write_split_to_file(out_dir, split, files_names):
    with open(os.path.join(out_dir, f"{split}_silence_list.txt"), "w") as f:
        for fname in files_names:
            f.write(fname + "\n")

In [6]:
def generate_split(noises, out_dir, n_samples, split_name):
    files_names = []
    for i in range(n_samples):
        silence = generate_silence(noises)
        fname = f"{split_name}_{i:05d}.wav"
        save_wav(os.path.join(out_dir, fname), silence)
        files_names.append(f"silence/{fname}")
    return files_names

In [7]:
noise_dir = os.path.join(data_dir, "_background_noise_")
output_root = os.path.join(data_dir, "silence")

os.makedirs(output_root, exist_ok=True)

noises = load_noises(noise_dir)

for split, n_samples in SPLITS.items():
    files_names = generate_split(
        noises,
        output_root,
        n_samples,
        split
    )
    assert len(files_names) == n_samples
    print(f"Pour le split {split} on a {len(files_names)} fichiers.")
    write_split_to_file(data_dir, split, files_names)

  sr, audio = wavfile.read(path)


Pour le split train on a 2000 fichiers.
Pour le split val on a 250 fichiers.
Pour le split test on a 250 fichiers.
