In [None]:
import os
import librosa
import numpy as np
import soundfile as sf
from tqdm import tqdm

In [None]:
# Path ke folder dataset preprocessed
base_dir = "dataset_preprocessed"

In [None]:
# Target minimum per kelas
target = 500

In [None]:
# Fungsi augmentasi
def pitch_shift(data, sr):
    return librosa.effects.pitch_shift(data, sr, n_steps=2)

def time_stretch(data, rate=1.2):
    return librosa.effects.time_stretch(data, rate)

def add_noise(data, noise_factor=0.005):
    noise = np.random.randn(len(data))
    return data + noise_factor * noise

# Augmentasi kelas
for class_name in os.listdir(base_dir):
    class_path = os.path.join(base_dir, class_name)
    if not os.path.isdir(class_path):
        continue

    files = [f for f in os.listdir(class_path) if f.endswith(".wav")]
    n_files = len(files)

    if n_files >= target:
        continue  # Skip jika sudah >= target

    print(f"Augmenting class '{class_name}' from {n_files} to {target}")
    idx = 0
    while n_files < target:
        file = files[idx % len(files)]
        file_path = os.path.join(class_path, file)
        data, sr = librosa.load(file_path, sr=16000)

        # Terapkan augmentasi
        for aug_fn in [pitch_shift, time_stretch, add_noise]:
            augmented = aug_fn(data, sr) if aug_fn != time_stretch else aug_fn(data)
            new_name = f"{os.path.splitext(file)[0]}_aug{n_files}.wav"
            new_path = os.path.join(class_path, new_name)
            sf.write(new_path, augmented, sr)
            n_files += 1
            if n_files >= target:
                break
        idx += 1
