Instal Dependencies

In [1]:
# Install required libraries
!pip install librosa soundfile audiomentations pydub numpy scipy

Collecting librosa
  Downloading librosa-0.11.0-py3-none-any.whl.metadata (8.7 kB)
Collecting soundfile
  Downloading soundfile-0.13.1-py2.py3-none-win_amd64.whl.metadata (16 kB)
Collecting audiomentations
  Downloading audiomentations-0.43.1-py3-none-any.whl.metadata (11 kB)
Collecting audioread>=2.1.9 (from librosa)
  Downloading audioread-3.1.0-py3-none-any.whl.metadata (9.0 kB)
Collecting pooch>=1.1 (from librosa)
  Downloading pooch-1.8.2-py3-none-any.whl.metadata (10 kB)
Collecting soxr>=0.3.2 (from librosa)
  Downloading soxr-1.0.0-cp311-cp311-win_amd64.whl.metadata (5.6 kB)
Collecting lazy_loader>=0.1 (from librosa)
  Downloading lazy_loader-0.4-py3-none-any.whl.metadata (7.6 kB)
Collecting msgpack>=1.0 (from librosa)
  Downloading msgpack-1.1.2-cp311-cp311-win_amd64.whl.metadata (8.4 kB)
Collecting numpy-minmax<1,>=0.3.0 (from audiomentations)
  Downloading numpy_minmax-0.5.0-cp311-cp311-win_amd64.whl.metadata (4.1 kB)
Collecting numpy-rms<1,>=0.4.2 (from audiomentations)
  Do


[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: C:\Users\VICTUS\AppData\Local\Programs\Python\Python311\python.exe -m pip install --upgrade pip


Import Library

In [2]:
import os
import librosa
import soundfile as sf
import numpy as np
from audiomentations import Compose, AddGaussianNoise, TimeStretch, PitchShift, Shift
from pydub import AudioSegment
import random
from tqdm import tqdm

Setup Path

In [3]:
import os
import glob

# Define paths
base_path = r"C:/Users/VICTUS/Documents/PSD/dataset/audio_sample"
output_path = r"C:/Users/VICTUS/Documents/PSD/dataset/augmented_audio"

# Target: 100 files per category
TARGET_FILES_PER_CATEGORY = 100

# Create output directories
os.makedirs(os.path.join(output_path, "buka"), exist_ok=True)
os.makedirs(os.path.join(output_path, "tutup"), exist_ok=True)

# Scan audio files inside category folder
def get_audio_files(category_dir):
    """Return list of files IN FULL PATH (not only filename)"""
    audio_extensions = (".mp3", ".wav", ".m4a", ".flac")
    files = []

    if os.path.exists(category_dir):
        for file in os.listdir(category_dir):
            if file.lower().endswith(audio_extensions):
                files.append(file)  # filename only
    return sorted(files)


# Fix: kategorinya harus 'buka' dan 'tutup'
SOURCE_FILES = {
    'buka': get_audio_files(os.path.join(base_path, "buka")),
    'tutup': get_audio_files(os.path.join(base_path, "tutup"))
}

# path folder buka / tutup
folder_buka = os.path.join(base_path, "buka")
folder_tutup = os.path.join(base_path, "tutup")

# ✅ FIX: hanya 1 argumen
buka_files = get_audio_files(folder_buka)
tutup_files = get_audio_files(folder_tutup)

print(f"Base path: {base_path}")
print(f"Output path: {output_path}")
print(f"Target: {TARGET_FILES_PER_CATEGORY} files per category")

print("\nSource files found:")
print(f"  Buka category: {len(buka_files)} files")
for file in buka_files:
    print("   ", file)

print(f"\n  Tutup category: {len(tutup_files)} files")
for file in tutup_files:
    print("   ", file)

print("DEBUG Source files:", SOURCE_FILES)


Base path: C:/Users/VICTUS/Documents/PSD/dataset/audio_sample
Output path: C:/Users/VICTUS/Documents/PSD/dataset/augmented_audio
Target: 100 files per category

Source files found:
  Buka category: 10 files
    Buka-1.mp3
    Buka-10.mp3
    Buka-2.mp3
    Buka-3.mp3
    Buka-4.mp3
    Buka-5.mp3
    Buka-6.mp3
    Buka-7.mp3
    Buka-8.mp3
    Buka-9.mp3

  Tutup category: 10 files
    Tutup-1.mp3
    Tutup-10.mp3
    Tutup-2.mp3
    Tutup-3.mp3
    Tutup-4.mp3
    Tutup-5.mp3
    Tutup-6.mp3
    Tutup-7.mp3
    Tutup-8.mp3
    Tutup-9.mp3
DEBUG Source files: {'buka': ['Buka-1.mp3', 'Buka-10.mp3', 'Buka-2.mp3', 'Buka-3.mp3', 'Buka-4.mp3', 'Buka-5.mp3', 'Buka-6.mp3', 'Buka-7.mp3', 'Buka-8.mp3', 'Buka-9.mp3'], 'tutup': ['Tutup-1.mp3', 'Tutup-10.mp3', 'Tutup-2.mp3', 'Tutup-3.mp3', 'Tutup-4.mp3', 'Tutup-5.mp3', 'Tutup-6.mp3', 'Tutup-7.mp3', 'Tutup-8.mp3', 'Tutup-9.mp3']}


Fungsi Augmentasi Audio

In [4]:
def load_audio(file_path):
    """
    Load audio from various formats (wav, mp3, m4a, flac).
    Uses librosa first, falls back to pydub (FFmpeg) if needed.
    """
    try:
        # Try loading using librosa first
        audio, sr = librosa.load(file_path, sr=None, mono=True)
        return audio, sr

    except Exception as e:
        print(f"[Librosa failed] {file_path}: {e}")
        print("→ Trying fallback loader (pydub)...")

        try:
            from pydub import AudioSegment

            audio_seg = AudioSegment.from_file(file_path)
            sr = audio_seg.frame_rate

            samples = np.array(audio_seg.get_array_of_samples()) / 32768.0  # convert to float32 -1..1

            # If stereo, convert to mono
            if audio_seg.channels > 1:
                samples = samples.reshape((-1, audio_seg.channels))
                samples = samples.mean(axis=1)

            return samples.astype(np.float32), sr

        except Exception as e2:
            print(f"❌ Failed to load {file_path} using fallback: {e2}")
            return None, None


def save_audio(audio, sr, output_path):
    """
    Save audio data to file
    """
    try:
        sf.write(output_path, audio, sr)
        print(f"✅ Saved: {output_path}")
    except Exception as e:
        print(f"❌ Error saving {output_path}: {e}")


In [5]:
def pitch_shift_augmentation(audio, sr, n_steps_list=[2, -2, 3, -3]):
    """
    Pitch shifting: mengubah nada suara
    """
    augmented_audios = []
    for n_steps in n_steps_list:
        shifted = librosa.effects.pitch_shift(audio, sr=sr, n_steps=n_steps)
        augmented_audios.append((shifted, f"pitch_{n_steps}"))
    return augmented_audios

def time_stretch_augmentation(audio, rates=[0.9, 1.1, 0.85, 1.15]):
    augmented_audios = []
    for rate in rates:
        try:
            stretched = librosa.effects.time_stretch(audio, rate=rate)
            augmented_audios.append((stretched, f"timestretch_{rate}"))
        except Exception as e:
            print(f"⚠ Error time stretch (rate={rate}): {e}")
    return augmented_audios


def add_noise_augmentation(audio, sr, noise_levels=[0.005, 0.01, 0.015]):
    augmented_audios = []
    for noise_level in noise_levels:
        noise = np.random.randn(len(audio)) * noise_level
        noisy_audio = audio + noise

        # Normalize safely
        max_val = np.max(np.abs(noisy_audio))
        if max_val > 0:
            noisy_audio = noisy_audio / max_val

        augmented_audios.append((noisy_audio, f"noise_{noise_level}"))
    return augmented_audios


def speed_change_augmentation(audio, sr, speed_factors=[1.1, 0.9, 1.2, 0.8]):
    augmented_audios = []
    for speed in speed_factors:
        try:
            indices = np.round(np.arange(0, len(audio), speed)).astype(int)
            indices = indices[indices < len(audio)]
            changed = audio[indices]
            augmented_audios.append((changed, f"speed_{speed}"))
        except Exception as e:
            print(f"⚠ Error speed change (speed={speed}): {e}")
    return augmented_audios


def volume_change_augmentation(audio, volume_factors=[1.2, 0.8, 1.3, 0.7]):
    """
    Volume change: mengubah volume audio
    """
    augmented_audios = []
    for volume in volume_factors:
        changed = audio * volume
        # Clip to prevent distortion
        changed = np.clip(changed, -1.0, 1.0)
        augmented_audios.append((changed, f"volume_{volume}"))
    return augmented_audios

In [6]:
def generate_diverse_augmentation(audio, sr):
    """
    Generate berbagai variasi augmentasi dengan parameter yang lebih beragam dan aman dari error
    """
    augmented_audios = []

    # 1. Pitch Shift
    pitch_steps = [-4, -3, -2, -1, 1, 2, 3, 4, -2.5, 2.5]
    for step in pitch_steps:
        try:
            shifted = librosa.effects.pitch_shift(audio, sr=sr, n_steps=step)
            augmented_audios.append(shifted)
        except Exception as e:
            print(f"⚠ PitchShift error ({step}): {e}")

    # 2. Time Stretch
    time_rates = [0.8, 0.85, 0.9, 0.95, 1.05, 1.1, 1.15, 1.2, 0.92, 1.08]
    for rate in time_rates:
        try:
            stretched = librosa.effects.time_stretch(audio, rate=rate)
            augmented_audios.append(stretched)
        except Exception as e:
            print(f"⚠ TimeStretch error ({rate}): {e}")

    # 3. Noise Addition
    noise_levels = [0.003, 0.005, 0.007, 0.01, 0.012, 0.015, 0.02, 0.004, 0.008, 0.018]
    for noise_level in noise_levels:
        noise = np.random.randn(len(audio)) * noise_level
        noisy = audio + noise
        noisy = noisy / (np.max(np.abs(noisy)) + 1e-8)
        augmented_audios.append(noisy)

    # 4. Speed Change (mengubah playback speed)
    speed_factors = [0.75, 0.85, 0.9, 0.95, 1.05, 1.1, 1.15, 1.25, 0.88, 1.12]
    for speed in speed_factors:
        try:
            indices = np.round(np.arange(0, len(audio), speed)).astype(int)
            indices = indices[indices < len(audio)]
            changed = audio[indices]
            augmented_audios.append(changed)
        except Exception as e:
            print(f"⚠ SpeedChange error ({speed}): {e}")

    # 5. Volume Change
    volume_factors = [0.6, 0.7, 0.8, 0.9, 1.1, 1.2, 1.3, 1.4, 0.75, 1.25]
    for volume in volume_factors:
        changed = audio * volume
        changed = np.clip(changed, -1.0, 1.0)
        augmented_audios.append(changed)

    # 6. Random combined augmentations
    num_combinations = 50
    for i in range(num_combinations):
        aug_audio = audio.copy()

        try:
            if random.random() > 0.5:
                aug_audio = librosa.effects.pitch_shift(
                    aug_audio, sr=sr, n_steps=random.uniform(-3, 3)
                )

            if random.random() > 0.5:
                aug_audio = librosa.effects.time_stretch(
                    aug_audio, rate=random.uniform(0.85, 1.15)
                )

            if random.random() > 0.4:
                noise = np.random.randn(len(aug_audio)) * random.uniform(0.003, 0.015)
                aug_audio = aug_audio + noise
                aug_audio = aug_audio / (np.max(np.abs(aug_audio)) + 1e-8)

            if random.random() > 0.5:
                aug_audio = np.clip(
                    aug_audio * random.uniform(0.7, 1.3),
                    -1.0, 1.0
                )

            augmented_audios.append(aug_audio)

        except Exception as e:
            print(f"⚠ Combined augmentation error: {e}")

    return augmented_audios


Fungsi Augmentasi untuk Generate 100 File dari Multiple Source Files

In [10]:
def augment_multiple_files_to_target(source_files, category, base_path, output_path, target_count=100):
    print(f"\n{'='*60}")
    print(f"Processing {category.upper()}")
    print(f"Source files: {len(source_files)} files")
    print(f"Target: {target_count} files")
    print(f"{'='*60}")

    if not source_files:
        print(f"Error: No source files found for {category}")
        return 0

    output_dir = os.path.join(output_path, category)
    os.makedirs(output_dir, exist_ok=True)   # ✅ FIX: create directory

    files_created = 0
    all_augmented = []

    for idx, filename in enumerate(source_files, 1):
        source_file = os.path.join(base_path, category, filename)  # ⛔ cek apakah benar foldernya

        print(f"\n[{idx}/{len(source_files)}] Loading: {filename}")

        audio, sr = load_audio(source_file)
        if audio is None:
            print(f"  Error: Failed to load {source_file}")
            continue

        all_augmented.append((audio, sr, f"original_{filename}"))

        print(f"  Generating augmentations...")
        augmented_audios = generate_diverse_augmentation(audio, sr)

        for aug_id, aug_audio in enumerate(augmented_audios, 1):
            all_augmented.append((aug_audio, sr, f"aug_{aug_id}_{filename}"))

        print(f"  Generated: {len(augmented_audios)} variations")

    print(f"\nTotal generated: {len(all_augmented)} audio samples")
    print(f"Selecting {target_count} samples...")
    random.shuffle(all_augmented)

    selected = all_augmented[:target_count]

    print(f"\nSaving {target_count} files...")
    for i, (aug_audio, sr, source_info) in enumerate(selected, 1):
        output_file = os.path.join(output_dir, f"{category}{i}.wav")
        save_audio(aug_audio, sr, output_file)
        files_created += 1

        if i % 10 == 0:
            print(f"  Progress: {i}/{target_count} files saved")

    print(f"✓ Completed: {files_created} files created for '{category}'")
    return files_created

def augment_dataset_100_per_category(base_path, output_path, source_files, target_count=100):
    """
    Augmentasi dataset: multiple files per kategori menjadi target_count file per kategori
    
    Args:
        base_path: path ke folder voice original
        output_path: path ke folder output
        source_files: dictionary {'buka': ['file1.mp3', 'file2.mp3', ...], 'tutup': [...]}
        target_count: jumlah file target per kategori
    
    Returns:
        statistics dictionary
    """
    stats = {}
    
    for category, filenames in source_files.items():
        if not filenames:
            print(f"Warning: No source files found for category: {category}")
            stats[category] = 0
            continue
        
        # Augment multiple files to target count
        files_created = augment_multiple_files_to_target(
            filenames,
            category, 
            base_path,
            output_path, 
            target_count
        )
        stats[category] = files_created
    
    return stats


Jalankan Augmentasi (Multiple Source Files → 100 Files per Category)

In [11]:
# Set random seed untuk reproducibility (optional)
random.seed(42)
np.random.seed(42)


print("="*60)
print("VOICE AUGMENTATION: Multiple Files -> 100 Files per Category")
print("="*60)

# Jalankan augmentasi
stats = augment_dataset_100_per_category(
    base_path, 
    output_path, 
    SOURCE_FILES, 
    TARGET_FILES_PER_CATEGORY
)

VOICE AUGMENTATION: Multiple Files -> 100 Files per Category

Processing BUKA
Source files: 10 files
Target: 100 files

[1/10] Loading: Buka-1.mp3
  Generating augmentations...
  Generated: 100 variations

[2/10] Loading: Buka-10.mp3
  Generating augmentations...
  Generated: 100 variations

[3/10] Loading: Buka-2.mp3
  Generating augmentations...
  Generated: 100 variations

[4/10] Loading: Buka-3.mp3
  Generating augmentations...
  Generated: 100 variations

[5/10] Loading: Buka-4.mp3
  Generating augmentations...
  Generated: 100 variations

[6/10] Loading: Buka-5.mp3
  Generating augmentations...
  Generated: 100 variations

[7/10] Loading: Buka-6.mp3
  Generating augmentations...
  Generated: 100 variations

[8/10] Loading: Buka-7.mp3
  Generating augmentations...
  Generated: 100 variations

[9/10] Loading: Buka-8.mp3
  Generating augmentations...
  Generated: 100 variations

[10/10] Loading: Buka-9.mp3
  Generating augmentations...
  Generated: 100 variations

Total generated: 1

Verifikasi Hasil

In [12]:
print("\n" + "="*60)
print("AUGMENTATION RESULTS")
print("="*60)

for category, count in stats.items():
    print(f"\n{category.upper()}: {count} files")

    category_path = os.path.join(output_path, category)

    if not os.path.exists(category_path):
        print(f"  ✗ Folder not found: {category_path}")
        continue

    actual_files = [f for f in os.listdir(category_path) if f.lower().endswith('.wav')]
    print(f"  Verified: {len(actual_files)} files in folder")

    print(f"  Sample filenames:")
    sample_indices = [1, 2, 3, count//2, count-1, count]

    for i in sample_indices:
        expected_file = f"{category}{i}.wav"
        status = "✓" if expected_file in actual_files else "✗"
        print(f"    {status} {expected_file}")

total_files = sum(stats.values())
print(f"\n{'='*60}")
print(f"TOTAL FILES CREATED: {total_files}")
print(f"{'='*60}")

print("\n" + "="*60)
print("FOLDER STRUCTURE")
print("="*60)

for category in ['buka', 'tutup']:
    category_path = os.path.join(output_path, category)
    if os.path.exists(category_path):
        files = sorted([f for f in os.listdir(category_path) if f.lower().endswith('.wav')])
        print(f"\n{category_path}")
        print(f"  Total: {len(files)} files")
        if files:
            print(f"  First: {files[0]}")
            print(f"  Last:  {files[-1]}")



AUGMENTATION RESULTS

BUKA: 100 files
  Verified: 100 files in folder
  Sample filenames:
    ✓ buka1.wav
    ✓ buka2.wav
    ✓ buka3.wav
    ✓ buka50.wav
    ✓ buka99.wav
    ✓ buka100.wav

TUTUP: 100 files
  Verified: 100 files in folder
  Sample filenames:
    ✓ tutup1.wav
    ✓ tutup2.wav
    ✓ tutup3.wav
    ✓ tutup50.wav
    ✓ tutup99.wav
    ✓ tutup100.wav

TOTAL FILES CREATED: 200

FOLDER STRUCTURE

C:/Users/VICTUS/Documents/PSD/dataset/augmented_audio\buka
  Total: 100 files
  First: buka1.wav
  Last:  buka99.wav

C:/Users/VICTUS/Documents/PSD/dataset/augmented_audio\tutup
  Total: 100 files
  First: tutup1.wav
  Last:  tutup99.wav
