# Install required packages

In [1]:
!pip install faster-whisper
!pip install ffmpeg-python
!pip install pandas


Collecting faster-whisper
  Downloading faster_whisper-1.2.0-py3-none-any.whl.metadata (16 kB)
Collecting ctranslate2<5,>=4.0 (from faster-whisper)
  Downloading ctranslate2-4.6.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Collecting onnxruntime<2,>=1.14 (from faster-whisper)
  Downloading onnxruntime-1.23.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.9 kB)
Collecting av>=11 (from faster-whisper)
  Downloading av-15.1.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (4.6 kB)
Collecting coloredlogs (from onnxruntime<2,>=1.14->faster-whisper)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime<2,>=1.14->faster-whisper)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
Downloading faster_whisper-1.2.0-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m17.8 MB/s[0m eta [3

#Upload video file to Colab

In [None]:
from google.colab import files

# Upload MP4 file
uploaded = files.upload()

# This will prompt us to choose our local MP4 file
# After upload, us can access it by its filename


#Python transcription code

In [None]:
from faster_whisper import WhisperModel
import os

def transcribe_video_to_vtt(video_path, output_vtt, model_size="small"):
    if not os.path.exists(video_path):
        raise FileNotFoundError(f"Video file not found: {video_path}")

    print(f"Loading model '{model_size}'...")
    # Use GPU if available
    model = WhisperModel(model_size, device="cuda" if model_size != "tiny" else "cpu")

    print(f"Transcribing video: {video_path}")
    segments, info = model.transcribe(video_path)

    print(f"Detected language: {info.language}, Probability: {info.language_probability:.2f}")

    with open(output_vtt, "w", encoding="utf-8") as vtt:
        vtt.write("WEBVTT\n\n")
        for i, segment in enumerate(segments, start=1):
            start = format_timestamp(segment.start)
            end = format_timestamp(segment.end)
            vtt.write(f"{i}\n{start} --> {end}\n{segment.text.strip()}\n\n")

    print(f"✅ Subtitles saved as {output_vtt}")

def format_timestamp(seconds: float) -> str:
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    secs = int(seconds % 60)
    millis = int((seconds - int(seconds)) * 1000)
    return f"{hours:02d}:{minutes:02d}:{secs:02d}.{millis:03d}"


#Run the transcription

| Model  | Speed | Accuracy |
| ------ | ----- | -------- |
| tiny   | ⚡⚡⚡⚡⚡ | ⭐⭐☆☆☆    |
| base   | ⚡⚡⚡⚡☆ | ⭐⭐⭐☆☆    |
| small  | ⚡⚡⚡☆☆ | ⭐⭐⭐⭐☆    |
| medium | ⚡⚡☆☆☆ | ⭐⭐⭐⭐☆    |
| large  | ⚡☆☆☆☆ | ⭐⭐⭐⭐⭐    |


In [None]:
video_file = "BS1064NEET26MAY18V1.mp4"   # uploaded file name
output_vtt = "BS1064NEET26MAY18V1.vtt"   # output subtitle file

transcribe_video_to_vtt(video_file, output_vtt, model_size="small")


#Download the VTT file

In [None]:
from google.colab import files
files.download("BS1064NEET26MAY18V1.vtt")