In [None]:
"""
Video to VTT Subtitle Generator using faster-whisper
----------------------------------------------------
Usage:
    python transcribe.py <video_file> <output_vtt> [model_size]

Example:
    python transcribe.py BS1064NEET26MAY18V1.mp4 BS1064NEET26MAY18V1.vtt small
"""

import sys
import os
from faster_whisper import WhisperModel


def format_timestamp(seconds: float) -> str:
    """
    Convert seconds to VTT timestamp format (HH:MM:SS.mmm)
    """
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    secs = int(seconds % 60)
    millis = int((seconds - int(seconds)) * 1000)
    return f"{hours:02d}:{minutes:02d}:{secs:02d}.{millis:03d}"


def transcribe_video_to_vtt(video_path: str, output_vtt: str, model_size="small"):
    """
    Transcribes an MP4 video file into a .vtt subtitle file using faster-whisper.
    """
    if not os.path.exists(video_path):
        raise FileNotFoundError(f"Video file not found: {video_path}")

    print(f"🚀 Loading model '{model_size}'...")
    device = "cuda" if model_size != "tiny" else "cpu"
    model = WhisperModel(model_size, device=device)

    print(f"🎙️ Transcribing video: {video_path}")
    segments, info = model.transcribe(video_path)

    print(f"🌎 Detected language: {info.language}, "
          f"Probability: {info.language_probability:.2f}")

    with open(output_vtt, "w", encoding="utf-8") as vtt:
        vtt.write("WEBVTT\n\n")
        for i, segment in enumerate(segments, start=1):
            start = format_timestamp(segment.start)
            end = format_timestamp(segment.end)
            vtt.write(f"{i}\n{start} --> {end}\n{segment.text.strip()}\n\n")

    print(f"✅ Subtitles saved as: {output_vtt}")


if __name__ == "__main__":
    if len(sys.argv) < 3:
        print("Usage: python transcribe.py <video_file> <output_vtt> [model_size]")
        sys.exit(1)

    video_file = sys.argv[1]
    output_vtt = sys.argv[2]
    model_size = sys.argv[3] if len(sys.argv) > 3 else "small"

    transcribe_video_to_vtt(video_file, output_vtt, model_size)
