In [None]:
import os
import sys
import torch
import functools

# Ensure ffmpeg is on PATH (freshly installed via winget)
_ffmpeg_dir = r"C:\Users\yashf\AppData\Local\Microsoft\WinGet\Packages\Gyan.FFmpeg_Microsoft.Winget.Source_8wekyb3d8bbwe\ffmpeg-8.0.1-full_build\bin"
if _ffmpeg_dir not in os.environ.get("PATH", ""):
    os.environ["PATH"] = _ffmpeg_dir + ";" + os.environ.get("PATH", "")

# Fix for PyTorch 2.6+ weights_only default change with pyannote/whisperx
_original_load = torch.load

@functools.wraps(_original_load)
def _safe_load(*args, **kwargs):
    kwargs['weights_only'] = False
    return _original_load(*args, **kwargs)

torch.load = _safe_load

import whisperx

# --- Configuration ---
SOURCE_PATH = r"C:\Users\yashf\ISP\video\videoplayback.mp4"  # Can be a single file or a folder
OUTPUT_FOLDER = r"C:\Users\yashf\ISP\transcripts"  # Folder where text files will be saved
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"  # Use GPU if available
BATCH_SIZE = 4  # Reduce if you run out of GPU memory
COMPUTE_TYPE = "float16" if torch.cuda.is_available() else "int8"  # "float16" for GPU, "int8" for CPU

def transcribe_videos():
    # 1. Create output folder if it doesn't exist
    if not os.path.exists(OUTPUT_FOLDER):
        os.makedirs(OUTPUT_FOLDER)

    # 2. Load the WhisperX model
    print("Loading WhisperX model...")
    try:
        model = whisperx.load_model("large-v2", device=DEVICE, compute_type=COMPUTE_TYPE)
    except Exception as e:
        print(f"Error loading model: {e}")
        return

    # 3. Build list of video files to process
    video_extensions = ('.mp4', '.mkv', '.mov', '.avi', '.webm')
    
    if os.path.isfile(SOURCE_PATH):
        # Single file provided
        video_files = [SOURCE_PATH] if SOURCE_PATH.lower().endswith(video_extensions) else []
    elif os.path.isdir(SOURCE_PATH):
        # Directory provided
        video_files = [
            os.path.join(SOURCE_PATH, f)
            for f in os.listdir(SOURCE_PATH)
            if f.lower().endswith(video_extensions)
        ]
    else:
        print(f"Source path does not exist: {SOURCE_PATH}")
        return

    if not video_files:
        print("No video files found.")
        return

    for video_path in video_files:
        filename = os.path.basename(video_path)
        output_filename = os.path.splitext(filename)[0] + ".txt"
        output_path = os.path.join(OUTPUT_FOLDER, output_filename)

        print(f"Processing: {filename}...")

        try:
            # 4. Load Audio directly from video (WhisperX uses ffmpeg internally)
            audio = whisperx.load_audio(video_path)

            # 5. Transcribe with VAD (Voice Activity Detection) to handle gaps
            result = model.transcribe(audio, batch_size=BATCH_SIZE)

            # 6. Save text to file
            with open(output_path, "w", encoding="utf-8") as f:
                for segment in result["segments"]:
                    # Write text with a newline for each segment
                    f.write(segment["text"].strip() + "\n")
            
            print(f"Saved transcription to: {output_path}")

        except Exception as e:
            print(f"Failed to process {filename}: {e}")

    print("All tasks finished.")

if __name__ == "__main__":
    transcribe_videos()

Loading WhisperX model...


  from .autonotebook import tqdm as notebook_tqdm
  torchaudio.list_audio_backends()
  available_backends = torchaudio.list_audio_backends()


2026-02-09 17:49:03 - whisperx.asr - INFO - No language specified, language will be detected for each audio file (increases inference time)
2026-02-09 17:49:03 - whisperx.vads.pyannote - INFO - Performing voice activity detection using Pyannote...


  if ismodule(module) and hasattr(module, '__file__'):
Lightning automatically upgraded your loaded checkpoint from v1.5.4 to v2.6.1. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint c:\Users\yashf\ISP\isp\Lib\site-packages\whisperx\assets\pytorch_model.bin`
  torchaudio.list_audio_backends()


Model was trained with pyannote.audio 0.0.1, yours is 3.4.0. Bad things might happen unless you revert pyannote.audio to 0.x.
Model was trained with torch 1.10.0+cu102, yours is 2.8.0+cpu. Bad things might happen unless you revert torch to 1.x.
Processing: videoplayback.mp4...
Failed to process videoplayback.mp4: [WinError 2] The system cannot find the file specified
All tasks finished.
