In [2]:
# Install the Whisper library
!pip install git+https://github.com/openai/whisper.git -q

# Check if a GPU is available (Whisper is much faster on a GPU)
!nvidia-smi

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for openai-whisper (pyproject.toml) ... [?25l[?25hdone
Mon Oct 27 20:07:50 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   35C    P8              9W /   70W |       0MiB /  15360MiB |      0%   

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
import whisper
import os
import time

# --- Configuration ---

# TODO: MUST-UPDATE!
# Update this path to point to your video folder in Google Drive
SOURCE_DIR = "/content/drive/MyDrive/ENTER_YOUR_FILE_HERE"

# Where to save the transcripts
TRANSCRIPT_DIR = "/content/drive/MyDrive/whisper_transcripts"

# Which Whisper model to use.
# Options: "tiny", "base", "small", "medium", "large"
# ".en" models (e.g., "base.en") are faster if you know all audio is English.
MODEL_NAME = "small.en"

# File extensions to look for
SUPPORTED_EXTENSIONS = ('.mp4', '.mov', '.m4a', '.mp3', '.wav', '.flac')

# --- End of Configuration ---


# 1. Create the output directory if it doesn't exist
os.makedirs(TRANSCRIPT_DIR, exist_ok=True)
print(f"Transcripts will be saved to: {TRANSCRIPT_DIR}")

# 2. Load the Whisper model
# This will download the model weights the first time it's run
try:
    print(f"Loading Whisper model '{MODEL_NAME}'... (This may take a moment)")
    model = whisper.load_model(MODEL_NAME)
    print("Model loaded successfully.")
except Exception as e:
    print(f"Error loading model: {e}")
    print("Please ensure you have a GPU runtime (Runtime > Change runtime type > GPU)")
    # Raise the exception to stop the script if the model fails to load
    raise

# 3. Walk through the source directory and find files
print(f"\nStarting transcription process for: {SOURCE_DIR}")
start_batch_time = time.time()
files_processed = 0
files_skipped = 0

for root, dirs, files in os.walk(SOURCE_DIR):
    for file in files:
        if file.lower().endswith(SUPPORTED_EXTENSIONS):
            video_path = os.path.join(root, file)

            # Define the output file path
            base_filename = os.path.splitext(file)[0]
            transcript_filename = f"{base_filename}.txt"
            transcript_path = os.path.join(TRANSCRIPT_DIR, transcript_filename)

            # Check if transcript already exists to avoid re-processing
            if os.path.exists(transcript_path):
                print(f"--- Skipping '{file}', transcript already exists. ---")
                files_skipped += 1
                continue

            # --- Run Transcription ---
            print(f"--- Transcribing '{file}'... ---")
            start_file_time = time.time()
            try:
                # Run the transcription
                # fp16=True is much faster on a GPU, but Whisper handles this automatically.
                result = model.transcribe(video_path, verbose=True)

                transcript_text = result["text"]

                # Save the transcript
                with open(transcript_path, "w", encoding="utf-8") as f:
                    f.write(transcript_text)

                end_file_time = time.time()
                print(f"--- Finished '{file}'. Saved to '{transcript_path}' ---")
                print(f"Time taken: {end_file_time - start_file_time:.2f} seconds\n")
                files_processed += 1

            except Exception as e:
                print(f"ERROR processing '{file}': {e}\n")

end_batch_time = time.time()
print("--- Batch Transcription Complete ---")
print(f"Total files processed: {files_processed}")
print(f"Total files skipped: {files_skipped}")
print(f"Total time taken: {(end_batch_time - start_batch_time) / 60:.2f} minutes")


Transcripts will be saved to: /content/drive/MyDrive/whisper_transcripts
Loading Whisper model 'small.en'... (This may take a moment)


100%|████████████████████████████████████████| 461M/461M [00:02<00:00, 188MiB/s]


Model loaded successfully.

Starting transcription process for: /content/drive/MyDrive/ENTER_YOUR_FILE_HERE
--- Batch Transcription Complete ---
Total files processed: 0
Total files skipped: 0
Total time taken: 0.00 minutes
