# AI 語音降噪 Colab
這個 Colab 筆記本會使用 [SpeechBrain](https://github.com/speechbrain/speechbrain) 的 MetricGAN+ 模型，使用 GPU 進行語音降噪。

載入 Google Drive 中的影音或聲音檔案，並將結果產生為聲音檔 (.wav)。

In [9]:
#@title 🎉 一鍵啟動語音降噪 / One-click Audio Denoiser
print("📂 掛載雲端硬碟中 / Mounting Google Drive...")
from google.colab import drive
drive.mount('/content/drive')

# 👉 請依需要改這裡（可放資料夾或單一檔案路徑）
input_path = "/content/drive/MyDrive/2025內部管理/YT長影片X短影片/講義介紹影片/"  #@param {type:"string"}
use_gpu = True  #@param {type:"boolean"}

print("📦 正在安裝套件 / Installing packages...")
# 安裝系統 ffmpeg（重要）
!apt-get -y update -qq
!apt-get -y install -qq ffmpeg
# Python 套件
!pip install -q speechbrain torchaudio ffmpeg-python soundfile
print("✅ 套件安裝完成 / Packages installed!")

import os, glob, subprocess, torch, torchaudio, tempfile
from speechbrain.pretrained import SpectralMaskEnhancement
import torchaudio.transforms as T

# 讓推論階段關閉梯度
torch.set_grad_enabled(False)

video_exts = ['.mp4','.mov','.avi','.mkv','.flv','.webm']
audio_exts = ['.wav','.mp3','.flac','.ogg','.m4a']

def extract_audio(src):
    """用 ffmpeg 從影片抽 16k/mono/PCM WAV，回傳暫存檔路徑。"""
    fd, tmp_path = tempfile.mkstemp(prefix="tmp_input_", suffix=".wav", dir="/content")
    os.close(fd)
    cmd = [
        'ffmpeg','-y','-hide_banner','-loglevel','error',
        '-i', src,
        '-vn',                 # 不要影片
        '-ac','1',             # 單聲道
        '-ar','16000',         # 16 kHz
        '-acodec','pcm_s16le', # PCM 16-bit
        tmp_path
    ]
    subprocess.run(cmd, check=True)
    if not os.path.exists(tmp_path) or os.path.getsize(tmp_path) == 0:
        raise RuntimeError(f"ffmpeg 輸出檔案異常：{tmp_path}")
    return tmp_path

def denoise_file(src, enhancer):
    """處理單一檔案：抽音（若是影片）→ 載入 → 轉單聲道/Resample → 降噪 → 存檔"""
    ext = os.path.splitext(src)[1].lower()
    processed_input = src
    tmp_to_cleanup = None

    if ext in video_exts:
        try:
            processed_input = extract_audio(src)
            tmp_to_cleanup = processed_input
        except subprocess.CalledProcessError as e:
            print(f"❌ 影片抽音失敗（ffmpeg）{src}: {e}")
            return
        except Exception as e:
            print(f"❌ 影片抽音發生錯誤 {src}: {e}")
            return

    try:
        audio, sr = torchaudio.load(processed_input)  # [channels, time]

        # 轉單聲道（保險）
        if audio.dim() == 2 and audio.shape[0] > 1:
            audio = torch.mean(audio, dim=0, keepdim=True)  # [1, T]

        # Resample -> 16 kHz（模型預期）
        if sr != 16000:
            resampler = T.Resample(orig_freq=sr, new_freq=16000)
            audio = resampler(audio)
            sr = 16000

        # 轉成 [batch, time] 形狀給 SpeechBrain（目前是 [1, T]，視為 batch=1）
        audio = audio.to(torch.float32)
        if audio.dim() == 2 and audio.shape[0] == 1:
            audio = audio.squeeze(0).unsqueeze(0)  # [1, T] 明確當作 batch

        # 放到正確的 device
        audio = audio.to(enhancer.device)

        # ---- 修正重點：enhance_batch 需要 lengths 參數（相對長度），給 1.0 代表整段有效 ----
        lengths = torch.tensor([1.0], device=audio.device)
        enhanced = enhancer.enhance_batch(audio, lengths=lengths)  # 回傳 [1, T]
        # ---------------------------------------------------------------------------

        # 存檔（torchaudio.save 需要 [channels, time]）
        out_file = os.path.splitext(src)[0] + '_denoised.wav'
        to_save = enhanced.detach().cpu()
        if to_save.dim() == 2 and to_save.shape[0] == 1:
            pass  # 已是 [1, T]
        elif to_save.dim() == 2 and to_save.shape[0] != 1:
            # 如果不小心成了 [B, T] 且 B>1，取第一條（理論上不會）
            to_save = to_save[:1]
        else:
            # 若是 [T]，補上 channel 維度
            to_save = to_save.unsqueeze(0)

        # 夾在 [-1, 1]，避免寫檔溢位
        to_save = torch.clamp(to_save, -1.0, 1.0)
        torchaudio.save(out_file, to_save, sr)
        print(f"🎵 已處理 / Done: {out_file}")

    except RuntimeError as e:
        print(f"❌ 無法載入或處理音訊檔案 {processed_input}: {e}")
    except Exception as e:
        print(f"❌ 處理檔案時發生錯誤 {src}: {e}")
    finally:
        if tmp_to_cleanup and os.path.exists(tmp_to_cleanup):
            try:
                os.remove(tmp_to_cleanup)
            except Exception:
                pass

# 選擇裝置
device = 'cuda' if (use_gpu and torch.cuda.is_available()) else 'cpu'
print(f"💻 使用裝置 / Device: {device}")

# 載入 SpeechBrain MetricGAN+ 增強器
enhancer = SpectralMaskEnhancement.from_hparams(
    source='speechbrain/metricgan-plus-voicebank',
    savedir='pretrained_models/metricgan-plus-voicebank',
    run_opts={'device': device}
)

# 批次或單檔處理
if os.path.isdir(input_path):
    print('📁 偵測到資料夾，開始批次處理 / Folder detected, processing all files...')
    files = [f for f in glob.glob(os.path.join(input_path, '*'))
             if os.path.splitext(f)[1].lower() in video_exts + audio_exts]
    for f in sorted(files):
        denoise_file(f, enhancer)
else:
    if os.path.exists(input_path):
        denoise_file(input_path, enhancer)
    else:
        print(f"⚠️ 找不到路徑：{input_path}")

print('🎉 全部搞定 / All done!')


📂 掛載雲端硬碟中 / Mounting Google Drive...
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
📦 正在安裝套件 / Installing packages...
W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)


INFO:speechbrain.utils.fetching:Fetch hyperparams.yaml: Using symlink found at '/content/pretrained_models/metricgan-plus-voicebank/hyperparams.yaml'
INFO:speechbrain.utils.fetching:Fetch custom.py: Fetching from HuggingFace Hub 'speechbrain/metricgan-plus-voicebank' if not cached


✅ 套件安裝完成 / Packages installed!
💻 使用裝置 / Device: cuda


DEBUG:speechbrain.utils.parameter_transfer:Collecting files (or symlinks) for pretraining in pretrained_models/metricgan-plus-voicebank.
INFO:speechbrain.utils.fetching:Fetch enhance_model.ckpt: Using symlink found at '/content/pretrained_models/metricgan-plus-voicebank/enhance_model.ckpt'
DEBUG:speechbrain.utils.parameter_transfer:Set local path in self.paths["enhance_model"] = /content/pretrained_models/metricgan-plus-voicebank/enhance_model.ckpt
INFO:speechbrain.utils.parameter_transfer:Loading pretrained files for: enhance_model
DEBUG:speechbrain.utils.parameter_transfer:Redirecting (loading from local path): enhance_model -> /content/pretrained_models/metricgan-plus-voicebank/enhance_model.ckpt


📁 偵測到資料夾，開始批次處理 / Folder detected, processing all files...
🎵 已處理 / Done: /content/drive/MyDrive/2025內部管理/YT長影片X短影片/講義介紹影片/IMG_3491_denoised.wav
🎵 已處理 / Done: /content/drive/MyDrive/2025內部管理/YT長影片X短影片/講義介紹影片/IMG_3492_denoised.wav
🎵 已處理 / Done: /content/drive/MyDrive/2025內部管理/YT長影片X短影片/講義介紹影片/IMG_3495_denoised.wav
🎵 已處理 / Done: /content/drive/MyDrive/2025內部管理/YT長影片X短影片/講義介紹影片/IMG_3496_denoised.wav
🎵 已處理 / Done: /content/drive/MyDrive/2025內部管理/YT長影片X短影片/講義介紹影片/IMG_3497_denoised.wav
🎵 已處理 / Done: /content/drive/MyDrive/2025內部管理/YT長影片X短影片/講義介紹影片/IMG_3498_denoised.wav
🎵 已處理 / Done: /content/drive/MyDrive/2025內部管理/YT長影片X短影片/講義介紹影片/IMG_3499_denoised.wav
🎵 已處理 / Done: /content/drive/MyDrive/2025內部管理/YT長影片X短影片/講義介紹影片/IMG_3500_denoised.wav
🎵 已處理 / Done: /content/drive/MyDrive/2025內部管理/YT長影片X短影片/講義介紹影片/IMG_3501_denoised.wav
🎵 已處理 / Done: /content/drive/MyDrive/2025內部管理/YT長影片X短影片/講義介紹影片/IMG_3502_denoised.wav
🎵 已處理 / Done: /content/drive/MyDrive/2025內部管理/YT長影片X短影片/講義介紹影片/IMG_3503_denoised.wav
🎵 已處理 