# Keyframe Extractor Colab Notebook

In [1]:
from google.colab import drive
drive.mount('/content/drive')
import os

Mounted at /content/drive


In [2]:
!pip install ffmpeg-python openai fastapi uvicorn pyngrok transformers
!apt-get -y install ffmpeg

Collecting ffmpeg-python
  Downloading ffmpeg_python-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.12-py3-none-any.whl.metadata (9.4 kB)
Downloading ffmpeg_python-0.2.0-py3-none-any.whl (25 kB)
Downloading pyngrok-7.2.12-py3-none-any.whl (26 kB)
Installing collected packages: pyngrok, ffmpeg-python
Successfully installed ffmpeg-python-0.2.0 pyngrok-7.2.12
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.


In [3]:
# Ngrok authentication catch
from google.colab import userdata
from google.colab.userdata import SecretNotFoundError
from pyngrok import ngrok

try:
    token = userdata.get("NGROK_AUTH_TOKEN")
    ngrok.set_auth_token(token)
    print("✅ NGROK_AUTH_TOKEN was loaded!")
except SecretNotFoundError:
    print("⚠️ NGROK_AUTH_TOKEN not found. Skipping auth-token setup.")

# HuggingFace tokken catch
try:
    hf_token = userdata.get("HF_TOKEN")
    os.environ["HF_TOKEN"] = hf_token
    print("✅ HF_TOKEN was loaded!")
except SecretNotFoundError:
    raise RuntimeError(
        "⚠️ HF_TOKEN not found. Skipping auth-token setup."
    )

✅ NGROK_AUTH_TOKEN was loaded!
✅ HF_TOKEN was loaded!


In [4]:
import json
import subprocess
from typing import List, Dict, Tuple
from pathlib import Path
from transformers import BlipProcessor, BlipForConditionalGeneration
import torch

# Initialize the fast BLIP processor to silence warnings
processor = BlipProcessor.from_pretrained(
    "Salesforce/blip-image-captioning-base",
    use_fast=True
)

# Load BLIP model with safetensors format to skip vulnerability check
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BlipForConditionalGeneration.from_pretrained(
    "Salesforce/blip-image-captioning-base",
    torch_dtype=torch.float32,
    use_safetensors=True       # ← correct way to tell HF to pick up .safetensors
).to(device)

class KeyFrameExtractor:
    def __init__(self, ffmpeg_path: str = "ffmpeg", ffprobe_path: str = "ffprobe"):
        self.ffmpeg_path = ffmpeg_path
        self.ffprobe_path = ffprobe_path

    def extract_keyframes(self, video_path: str, output_dir: str) -> Dict:
        """
        Extract key frames from video and return metadata
        """
        # Create output directory
        os.makedirs(output_dir, exist_ok=True)

        # Get video filename without extension
        video_name = Path(video_path).stem

        # Extract key frames using FFmpeg
        frames_output = os.path.join(output_dir, f"{video_name}-keyframe-%03d-%09d.png")

        ffmpeg_cmd = [
            self.ffmpeg_path,
            "-i", video_path,
            "-vf", "select=eq(pict_type\\,PICT_TYPE_I)",
            "-vsync", "2",
            "-frame_pts", "1",
            "-y",  # Overwrite output files
            frames_output
        ]

        try:
            # Run FFmpeg command
            result = subprocess.run(
                ffmpeg_cmd,
                capture_output=True,
                text=True,
                check=True
            )

            # Get key frame timestamps using FFprobe
            timestamps = self._get_keyframe_timestamps(video_path)

            # Get list of generated frame files
            frame_files = self._get_frame_files(output_dir, video_name)

            return {
                "success": True,
                "frames_extracted": len(frame_files),
                "frame_files": frame_files,
                "timestamps": timestamps,
                "output_directory": output_dir
            }

        except subprocess.CalledProcessError as e:
            return {
                "success": False,
                "error": f"FFmpeg error: {e.stderr}",
                "frames_extracted": 0
            }
        except Exception as e:
            return {
                "success": False,
                "error": f"Unexpected error: {str(e)}",
                "frames_extracted": 0
            }

    def _get_keyframe_timestamps(self, video_path: str) -> List[Dict]:
        """
        Extract key frame timestamps using FFprobe
        """
        ffprobe_cmd = [
            self.ffprobe_path,
            "-v", "quiet",
            "-select_streams", "v:0",
            "-show_entries", "frame=pict_type,pts_time",
            "-of", "json",
            video_path
        ]

        try:
            result = subprocess.run(
                ffprobe_cmd,
                capture_output=True,
                text=True,
                check=True
            )

            data = json.loads(result.stdout)
            keyframes = []

            for frame in data.get("frames", []):
                if frame.get("pict_type") == "I":
                    keyframes.append({
                        "timestamp": float(frame.get("pts_time", 0)),
                        "pts_time": frame.get("pts_time", "0"),
                        "description": f"Key frame at {frame.get('pts_time', '0')} seconds"
                    })

            return keyframes

        except (subprocess.CalledProcessError, json.JSONDecodeError) as e:
            return []

    def _get_frame_files(self, output_dir: str, video_name: str) -> List[str]:
        """
        Get list of generated frame files
        """
        frame_files = []
        for file in os.listdir(output_dir):
            if file.startswith(f"{video_name}-keyframe-") and file.endswith(".png"):
                frame_files.append(file)

        return sorted(frame_files)

preprocessor_config.json:   0%|          | 0.00/287 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/506 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

In [5]:
# Example: Extract keyframes from a sample video in Google Drive
video_path = '/content/drive/MyDrive/sample.mp4'
output_dir = '/content/drive/MyDrive/keyframes_sample'

extractor = KeyFrameExtractor('ffmpeg', 'ffprobe')
result = extractor.extract_keyframes(video_path, output_dir)
print(result)

{'success': False, 'error': 'FFmpeg error: ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers\n  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)\n  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvids

In [6]:
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import JSONResponse
import uvicorn
import nest_asyncio
from pyngrok import ngrok
from google.colab import userdata

app = FastAPI()

extractor = KeyFrameExtractor('ffmpeg', 'ffprobe')

@app.post("/extract_keyframes")
async def extract_keyframes_api(file: UploadFile = File(...)):
    # Save uploaded file to /content/drive/MyDrive/uploads/
    upload_dir = '/content/drive/MyDrive/uploads'
    os.makedirs(upload_dir, exist_ok=True)
    file_path = os.path.join(upload_dir, file.filename)
    with open(file_path, "wb") as f:
        f.write(await file.read())
    output_dir = os.path.join('/content/drive/MyDrive/keyframes', os.path.splitext(file.filename)[0])
    os.makedirs(output_dir, exist_ok=True)
    result = extractor.extract_keyframes(file_path, output_dir)
    return JSONResponse(result)

# Set up ngrok authtoken
ngrok_auth_token = userdata.get('NGROK_AUTH_TOKEN')
if ngrok_auth_token:
    ngrok.set_auth_token(ngrok_auth_token)
else:
    print("NGROK_AUTH_TOKEN not found in Colab secrets. Please add it.")
    # You might want to exit or raise an error here if the token is mandatory

# Start ngrok tunnel and server
ngrok_tunnel = ngrok.connect(8000)
print("Public URL:", ngrok_tunnel.public_url)

nest_asyncio.apply()
uvicorn.run(app, port=8000)

Public URL: https://54f851e73e92.ngrok-free.app


INFO:     Started server process [385]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)
INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [385]


In [7]:
print(f"Example curl:\ncurl -F 'file=@/path/to/video.mp4' {ngrok_tunnel.public_url}/extract_keyframes")
print(f"Example Python:\nimport requests\nfiles = {{'file': open('video.mp4', 'rb')}}\nrequests.post('{ngrok_tunnel.public_url}/extract_keyframes', files=files)")

Example curl:
curl -F 'file=@/path/to/video.mp4' https://54f851e73e92.ngrok-free.app/extract_keyframes
Example Python:
import requests
files = {'file': open('video.mp4', 'rb')}
requests.post('https://54f851e73e92.ngrok-free.app/extract_keyframes', files=files)
