In [1]:
# ------------------------------
# Batch Transcription Script with live CSV writing + timing + GPU info + speed score
# ------------------------------

import os, csv, json, time, torch, whisper, subprocess
from datetime import datetime
from pydub import AudioSegment

LANGUAGE = "es"
MODEL_SIZE = "medium"
INPUT_DIR = r"C:\Users\micha\spanish_articulation\h"
OUTPUT_DIR = "batch_output"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Detect device and GPU info
if torch.cuda.is_available():
    DEVICE = "cuda"
    try:
        gpu_name = torch.cuda.get_device_name(0)
        cuda_version = torch.version.cuda
        driver_info = subprocess.check_output(
            ["nvidia-smi", "--query-gpu=driver_version", "--format=csv,noheader"],
            encoding="utf-8"
        ).strip()
        device_label = f"GPU ({gpu_name}, Driver {driver_info}, CUDA {cuda_version})"
    except Exception as e:
        device_label = f"GPU (CUDA {torch.version.cuda}) - details unavailable: {e}"
else:
    DEVICE = "cpu"
    device_label = "CPU"

print(f"Loading Whisper model on {device_label}...")
model = whisper.load_model(MODEL_SIZE, device=DEVICE)
print("✅ Model loaded!")

files = [f for f in os.listdir(INPUT_DIR) if f.lower().endswith(".mp4")]
files.sort()

csv_path = os.path.join(OUTPUT_DIR, "transcriptions.csv")
total_time, total_duration = 0.0, 0.0

with open(csv_path, "w", newline="", encoding="utf-8") as cf:
    writer = csv.DictWriter(cf, fieldnames=[
        "filename", "transcript", "time_seconds", "file_duration_seconds",
        "speed_score", "device_used", "finished_at"
    ])
    writer.writeheader()

    for filename in files:
        file_path = os.path.join(INPUT_DIR, filename)
        print(f"Transcribing {filename} on {device_label}...")

        # Duration of input file
        audio = AudioSegment.from_file(file_path)
        file_duration = len(audio) / 1000.0
        total_duration += file_duration

        start_time = time.time()
        result = model.transcribe(file_path, language=LANGUAGE, task="transcribe")
        elapsed = time.time() - start_time
        total_time += elapsed

        text = result.get("text", "").strip()
        finished_at = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

        # Speed score: duration / elapsed
        speed_score = file_duration / elapsed if elapsed > 0 else None

        # Save JSON
        json_path = os.path.join(OUTPUT_DIR, f"{os.path.splitext(filename)[0]}.json")
        with open(json_path, "w", encoding="utf-8") as jf:
            json.dump(result, jf, ensure_ascii=False, indent=2)

        # Write row immediately
        writer.writerow({
            "filename": filename,
            "transcript": text,
            "time_seconds": f"{elapsed:.2f}",
            "file_duration_seconds": f"{file_duration:.2f}",
            "speed_score": f"{speed_score:.2f}" if speed_score else "",
            "device_used": device_label,
            "finished_at": finished_at
        })
        cf.flush()

        print(f"✅ {filename} done in {elapsed:.2f}s (file {file_duration:.2f}s, score {speed_score:.2f}) at {finished_at}")

    # Overall summary
    overall_score = total_duration / total_time if total_time > 0 else None
    writer.writerow({
        "filename": "TOTAL",
        "transcript": "",
        "time_seconds": f"{total_time:.2f}",
        "file_duration_seconds": f"{total_duration:.2f}",
        "speed_score": f"{overall_score:.2f}" if overall_score else "",
        "device_used": device_label,
        "finished_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    })
    cf.flush()

print(f"✅ Batch transcription complete. JSON + CSV saved in {os.path.abspath(OUTPUT_DIR)}")

Loading Whisper model on GPU (NVIDIA T1000 8GB, Driver 580.97, CUDA 12.1)...
✅ Model loaded!
Transcribing 1_adher_front_neg.mp4 on GPU (NVIDIA T1000 8GB, Driver 580.97, CUDA 12.1)...
✅ 1_adher_front_neg.mp4 done in 4.76s (file 6.33s, score 1.33) at 2025-11-28 17:53:31
Transcribing 1_adher_side_eg.mp4 on GPU (NVIDIA T1000 8GB, Driver 580.97, CUDA 12.1)...
✅ 1_adher_side_eg.mp4 done in 4.53s (file 6.41s, score 1.41) at 2025-11-28 17:53:36
Transcribing 1_adher_side_neg.mp4 on GPU (NVIDIA T1000 8GB, Driver 580.97, CUDA 12.1)...
✅ 1_adher_side_neg.mp4 done in 4.31s (file 6.53s, score 1.51) at 2025-11-28 17:53:40
Transcribing 1_ah_front_eg.mp4 on GPU (NVIDIA T1000 8GB, Driver 580.97, CUDA 12.1)...
✅ 1_ah_front_eg.mp4 done in 4.10s (file 5.83s, score 1.42) at 2025-11-28 17:53:44
Transcribing 1_ah_front_neg.mp4 on GPU (NVIDIA T1000 8GB, Driver 580.97, CUDA 12.1)...
✅ 1_ah_front_neg.mp4 done in 4.28s (file 5.69s, score 1.33) at 2025-11-28 17:53:49
Transcribing 1_ah_side_eg.mp4 on GPU (NVIDIA T1

KeyboardInterrupt: 