<a href="https://colab.research.google.com/github/Baah134/Baah134/blob/main/Whisper/Whisper_Transcription_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install openai-whisper jiwer pandas

import os
import pandas as pd
import glob
import whisper
from jiwer import wer

In [None]:
from google.colab import drive
drive.mount('/content/drive')

audio_folder = '/content/drive/MyDrive/DeepLearning/Whisper/my_data/audio'
csv_file = "/content/drive/MyDrive/DeepLearning/Whisper/my_data/transcriptions.csv"

In [3]:
from tqdm import tqdm

In [None]:
df = pd.read_csv(csv_file)

# Dictionary {audio file name (without extension): transcription}
transcriptions = {os.path.splitext(row["filename"])[0]: row["transcription"] for _, row in df.iterrows()}

# ============================
# STEP 2: LOAD AUDIO FILES
# ============================

# Get all audio files (wav, m4a, etc.)
audio_files = glob.glob(os.path.join(audio_folder, "*"))

# ============================
# STEP 3: INITIALIZE WHISPER
# ============================
model = whisper.load_model("large")  # "small", "medium", "large" for better accuracy

# ============================
# STEP 4: PROCESS EACH AUDIO FILE
# ============================
matched_data = []  # Store results
total_wer = 0  # Sum of all WERs
valid_files = 0  # Count files with transcriptions

for audio_path in tqdm(audio_files, desc="Processing Audios"):
    file_name = os.path.basename(audio_path)  # Extract filename (e.g., "sample_001.wav")
    file_stem = os.path.splitext(file_name)[0]  # Remove extension (e.g., "sample_001")

    # ✅ Ensure the transcription exists
    ground_truth = transcriptions.get(file_stem)
    if ground_truth is None:
        continue  # Skip if no matching transcription

    # ✅ Transcribe audio using Whisper
    result = model.transcribe(audio_path, fp16 = False)
    whisper_transcription = result["text"].strip()

    # ✅ Compute Word Error Rate (WER)
    error_rate = wer(ground_truth, whisper_transcription)

    # ✅ Store results
    matched_data.append((file_name, whisper_transcription, ground_truth, error_rate))
    total_wer += error_rate
    valid_files += 1

# ============================
# STEP 5: COMPUTE AVERAGE WER
# ============================
average_wer = total_wer / valid_files if valid_files else 0

# ============================
# STEP 6: SAVE RESULTS TO CSV
# ============================
output_csv = "whisper_results.csv"
df_results = pd.DataFrame(matched_data, columns=["Filename", "Whisper Transcription", "Ground Truth", "WER"])
df_results.to_csv(output_csv, index=False)

# Print Results
print(df_results)
print(f"\n✅ Average WER: {average_wer:.4f}")