In [None]:
import whisper
import torch
import jiwer
import string
from scipy.io import wavfile

# --- 1. Define the list of 30 ground truth sentences ---
# This list is from the NOIZEUS corpus documentation.
GROUND_TRUTH_SENTENCES = [
    "THE BIRCH CANOE SLID ON THE SMOOTH PLANKS",
    "HE KNEW THE SKILL OF THE GREAT YOUNG ACTRESS",
    "HER PURSE WAS FULL OF USELESS TRASH",
    "READ VERSE OUT LOUD FOR PLEASURE",
    "WIPE THE GREASE OFF HIS DIRTY FACE",
    "MEN STRIVE BUT SELDOM GET RICH",
    "WE FIND JOY IN THE SIMPLEST THINGS",
    "HEDGE APPLES MAY STAIN YOUR HANDS GREEN",
    "HURDLE THE PIT WITH THE AID OF A LONG POLE",
    "THE SKY THAT MORNING WAS CLEAR AND BRIGHT BLUE",
    "HE WROTE DOWN A LONG LIST OF ITEMS",
    "THE DRIP OF THE RAIN MADE A PLEASANT SOUND",
    "SMOKE POURED OUT OF EVERY CRACK",
    "HATS ARE WORN TO TEA AND NOT TO DINNER",
    "THE CLOTHES DRIED ON A THIN WOODEN RACK",
    "THE STRAY CAT GAVE BIRTH TO KITTENS",
    "THE LAZY COW LAY IN THE COOL GRASS",
    "THE FRIENDLY GANG LEFT THE DRUG STORE",
    "WE TALKED OF THE SIDESHOW IN THE CIRCUS",
    "THE SET OF CHINA HIT THE FLOOR WITH A CRASH",
    "CLAMS ARE SMALL, ROUND, SOFT AND TASTY",
    "THE LINE WHERE THE EDGES JOIN WAS CLEAN",
    "STOP WHISTLING AND WATCH THE BOYS MARCH",
    "A CRUISE IN WARM WATERS IN A SLEEK YACHT IS FUN",
    "A GOOD BOOK INFORMS OF WHAT WE OUGHT TO KNOW",
    "SHE HAS A SMART WAY OF WEARING CLOTHES",
    "BRING YOUR BEST COMPASS TO THE THIRD CLASS",
    "THE CLUB RENTED THE RINK FOR THE FIFTH NIGHT",
    "THE FLINT SPUTTERED AND LIT A PINE TORCH",
    "LET'S ALL JOIN AS WE SING THE LAST CHORUS"
]

# --- 2. Define the files and methods to test ---
# This path assumes your notebook is in the `notebooks` folder.
# Verify these filenames match what's in your `data/cleaned` and `data/raw` folders.
FILES_TO_TEST = [
    {
        "method_name": "Baseline (Original Noisy Audio)",
        "path": "../data/raw/car/5dB/sp01_car_sn5.wav"
    },
    {
        "method_name": "Spectral Subtraction",
        "path": "../data/cleaned/sp01_ss_cleaned.wav"
    },
    {
        "method_name": "Wiener Filter",
        "path": "../data/cleaned/sp01_wf_cleaned.wav"
    },
    {
        "method_name": "Hybrid Method",
        "path": "../data/cleaned/sp01_hy_cleaned.wav"
    }
]

SENTENCE_INDEX = 0 # All 'sp01' files correspond to the first sentence (index 0)
MODEL_SIZE = "base"

# --- 3. Load the model (only once) ---
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

print(f"Loading Whisper model ('{MODEL_SIZE}')...")
model = whisper.load_model(MODEL_SIZE, device=device)
print("Model loaded successfully.")

# --- 4. Define normalization function and get ground truth ---
def normalize_text(text):
    return text.upper().translate(str.maketrans('', '', string.punctuation))

ground_truth_raw = GROUND_TRUTH_SENTENCES[SENTENCE_INDEX]
ground_truth = normalize_text(ground_truth_raw)
print(f"\nGround Truth: {ground_truth}\n")


# --- 5. Loop through and test each file ---
for test_case in FILES_TO_TEST:
    method_name = test_case["method_name"]
    audio_path = test_case["path"]
    
    print(f"--- ANALYSIS FOR: {method_name} ---")
    print(f"Transcribing file: {audio_path}")
    
    try:
        # Transcribe the audio file
        result = model.transcribe(audio_path)
        whisper_output_raw = result["text"].strip()
        
        # Normalize the output
        whisper_output = normalize_text(whisper_output_raw)
        
        # Calculate WER
        error = jiwer.wer(ground_truth, whisper_output)
        
        print(f"Whisper Output: {whisper_output}")
        print(f"Word Error Rate (WER): {error * 100:.2f}%\n")
        
    except FileNotFoundError:
        print(f"ERROR: File not found at {audio_path}. Please check the path.\n")

Using device: cuda
Loading Whisper model ('base')...
Model loaded successfully.

Ground Truth: THE BIRCH CANOE SLID ON THE SMOOTH PLANKS

--- ANALYSIS FOR: Baseline (Original Noisy Audio) ---
Transcribing file: ../data/raw/car/5dB/sp01.wav


RuntimeError: Failed to load audio: ffmpeg version 2025-06-26-git-09cd38e9d5-full_build-www.gyan.dev Copyright (c) 2000-2025 the FFmpeg developers
  built with gcc 15.1.0 (Rev4, Built by MSYS2 project)
  configuration: --enable-gpl --enable-version3 --enable-static --disable-w32threads --disable-autodetect --enable-fontconfig --enable-iconv --enable-gnutls --enable-lcms2 --enable-libxml2 --enable-gmp --enable-bzlib --enable-lzma --enable-libsnappy --enable-zlib --enable-librist --enable-libsrt --enable-libssh --enable-libzmq --enable-avisynth --enable-libbluray --enable-libcaca --enable-libdvdnav --enable-libdvdread --enable-sdl2 --enable-libaribb24 --enable-libaribcaption --enable-libdav1d --enable-libdavs2 --enable-libopenjpeg --enable-libquirc --enable-libuavs3d --enable-libxevd --enable-libzvbi --enable-liboapv --enable-libqrencode --enable-librav1e --enable-libsvtav1 --enable-libvvenc --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxavs2 --enable-libxeve --enable-libxvid --enable-libaom --enable-libjxl --enable-libvpx --enable-mediafoundation --enable-libass --enable-frei0r --enable-libfreetype --enable-libfribidi --enable-libharfbuzz --enable-liblensfun --enable-libvidstab --enable-libvmaf --enable-libzimg --enable-amf --enable-cuda-llvm --enable-cuvid --enable-dxva2 --enable-d3d11va --enable-d3d12va --enable-ffnvcodec --enable-libvpl --enable-nvdec --enable-nvenc --enable-vaapi --enable-libshaderc --enable-vulkan --enable-libplacebo --enable-opencl --enable-libcdio --enable-openal --enable-libgme --enable-libmodplug --enable-libopenmpt --enable-libopencore-amrwb --enable-libmp3lame --enable-libshine --enable-libtheora --enable-libtwolame --enable-libvo-amrwbenc --enable-libcodec2 --enable-libilbc --enable-libgsm --enable-liblc3 --enable-libopencore-amrnb --enable-libopus --enable-libspeex --enable-libvorbis --enable-ladspa --enable-libbs2b --enable-libflite --enable-libmysofa --enable-librubberband --enable-libsoxr --enable-chromaprint
  libavutil      60.  3.100 / 60.  3.100
  libavcodec     62.  4.100 / 62.  4.100
  libavformat    62.  1.100 / 62.  1.100
  libavdevice    62.  0.100 / 62.  0.100
  libavfilter    11.  0.100 / 11.  0.100
  libswscale      9.  0.100 /  9.  0.100
  libswresample   6.  0.100 /  6.  0.100
[in#0 @ 0000020ce0b44380] Error opening input: No such file or directory
Error opening input file ../data/raw/car/5dB/sp01.wav.
Error opening input files: No such file or directory
