In [22]:
import os
import random
import numpy as np
import librosa
import soundfile as sf

# 參數設定
input_folder = "pokemon_sounds"
output_folder = "augmented_pokemon_sounds"
sampling_rate = 22050
num_clips_per_file = 200
# min_clip_duration =   # 秒
# max_clip_duration = 1.8  # 秒
max_output_duration = 3.0  # 輸出最長秒數

def generate_white_noise(length, amplitude=0.03):
    return np.random.normal(0, amplitude, length)

def pad_audio(audio, target_len, pad_front=True):
    pad_size = target_len - len(audio)
    padding = np.zeros(pad_size)
    return np.concatenate([padding, audio]) if pad_front else np.concatenate([audio, padding])

def generate_augmented_clips(y, sr, num_clips):
    clips = []
    total_len = len(y)
    min_samples = int(total_len*0.9)
    max_samples = total_len
    output_len = int(max_output_duration * sr)

    for _ in range(num_clips):
        clip_len = random.randint(min_samples, max_samples)

        if total_len < clip_len:
            raise ValueError(f"原始音訊長度不足 ({total_len/sr:.2f}s) 無法裁剪 {clip_len/sr:.2f}s")

        strategy = random.choices(
            ["head", "tail", "full"],
            weights=[1, 1, 8],  # 例如讓 full 的機率變成原本的 3 倍
            k=1
        )[0]

        if strategy == "head":
            start = random.randint(0, total_len - clip_len)
            clip = y[start:start + clip_len]
            padded = pad_audio(clip, output_len, pad_front=True)
        elif strategy == "tail":
            end = random.randint(clip_len, total_len)
            clip = y[end - clip_len:end]
            padded = pad_audio(clip, output_len, pad_front=False)
        else:  # full: 使用整個音訊
            clip = y  # 直接使用整個音訊
            total_padding = output_len - len(clip)
            # 隨機決定前後填充的比例
            pad_front = random.randint(0, total_padding)  # 隨機決定前面要填充多少
            pad_back = total_padding - pad_front  # 剩下的填充到後面
            padded = np.concatenate([np.zeros(pad_front), clip, np.zeros(pad_back)])

        noise = generate_white_noise(len(padded))
        noisy_clip = padded + noise
        clips.append(noisy_clip)

    return clips


def process_pokemon_sounds(input_root, output_root):
    for pokemon in os.listdir(input_root):
        pokemon_dir = os.path.join(input_root, pokemon)
        if not os.path.isdir(pokemon_dir):
            continue

        for filename in os.listdir(pokemon_dir):
            if not filename.endswith(".mp3"):
                continue

            filepath = os.path.join(pokemon_dir, filename)
            print(f"🔄 處理 {filepath} ...")

            try:
                y, sr = librosa.load(filepath, sr=sampling_rate)
                clips = generate_augmented_clips(y, sr, num_clips_per_file)
                def normalize_audio(audio_data):
                    # 將音訊數據正規化到 [-1, 1] 範圍
                    return audio_data / np.max(np.abs(audio_data))
                clips = normalize_audio(clips)
                output_dir = os.path.join(output_root, pokemon)
                os.makedirs(output_dir, exist_ok=True)

                for i, clip in enumerate(clips):
                    output_path = os.path.join(output_dir, f"clip_{i+1}.wav")
                    sf.write(output_path, clip, sr)
                    print(f"✅ 輸出：{output_path}")

            except Exception as e:
                print(f"❌ 處理 {pokemon} 時發生錯誤：{e}")

# 執行程式
process_pokemon_sounds(input_folder, output_folder)


🔄 處理 pokemon_sounds\Blastoise\Blastoise.mp3 ...
✅ 輸出：augmented_pokemon_sounds\Blastoise\clip_1.wav
✅ 輸出：augmented_pokemon_sounds\Blastoise\clip_2.wav
✅ 輸出：augmented_pokemon_sounds\Blastoise\clip_3.wav
✅ 輸出：augmented_pokemon_sounds\Blastoise\clip_4.wav
✅ 輸出：augmented_pokemon_sounds\Blastoise\clip_5.wav
✅ 輸出：augmented_pokemon_sounds\Blastoise\clip_6.wav
✅ 輸出：augmented_pokemon_sounds\Blastoise\clip_7.wav
✅ 輸出：augmented_pokemon_sounds\Blastoise\clip_8.wav
✅ 輸出：augmented_pokemon_sounds\Blastoise\clip_9.wav
✅ 輸出：augmented_pokemon_sounds\Blastoise\clip_10.wav
✅ 輸出：augmented_pokemon_sounds\Blastoise\clip_11.wav
✅ 輸出：augmented_pokemon_sounds\Blastoise\clip_12.wav
✅ 輸出：augmented_pokemon_sounds\Blastoise\clip_13.wav
✅ 輸出：augmented_pokemon_sounds\Blastoise\clip_14.wav
✅ 輸出：augmented_pokemon_sounds\Blastoise\clip_15.wav
✅ 輸出：augmented_pokemon_sounds\Blastoise\clip_16.wav
✅ 輸出：augmented_pokemon_sounds\Blastoise\clip_17.wav
✅ 輸出：augmented_pokemon_sounds\Blastoise\clip_18.wav
✅ 輸出：augmented_pokemon_so