In [None]:
!pip install transformers torch torchvision

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import zipfile
import os

# Paths to ZIP files in Drive
zip1 = "/content/drive/MyDrive/all_videos1.zip"
zip2 = "/content/drive/MyDrive/all_videos2.zip"

# Where to extract them
dest1 = "/content/frames_unzip1"
dest2 = "/content/frames_unzip2"

# Extract
for zip_path, dest in [(zip1, dest1), (zip2, dest2)]:
    os.makedirs(dest, exist_ok=True)
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(dest)

print("✅ Both ZIPs extracted.")

✅ Both ZIPs extracted.


In [None]:
import os
import numpy as np
import torch
from PIL import Image
from tqdm import tqdm
from transformers import CLIPModel, CLIPProcessor

In [None]:
FRAME_SIZE = (224, 224)
FRAMES_PER_VIDEO = 16
FOLDER_LIST = [
    "/content/frames_unzip1",
    "/content/frames_unzip2"
]
OUTPUT_DIR = "/content/drive/MyDrive/video_vectorizer/clip4clip_embeddings"
os.makedirs(OUTPUT_DIR, exist_ok=True)


device = "cuda" if torch.cuda.is_available() else "cpu"
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
model.eval()

def load_clip_frames(folder_path, num_frames=FRAMES_PER_VIDEO):
    all_frames = sorted([
        os.path.join(folder_path, f)
        for f in os.listdir(folder_path)
        if f.lower().endswith((".jpg", ".png"))
    ])

    if len(all_frames) == 0:
        raise ValueError(f"No frames found in {folder_path}")

    indices = np.linspace(0, len(all_frames) - 1, num=min(num_frames, len(all_frames)), dtype=int)
    selected = [Image.open(all_frames[i]).convert("RGB").resize(FRAME_SIZE) for i in indices]

    while len(selected) < num_frames:
        selected.append(Image.new("RGB", FRAME_SIZE))  # padding with black frame

    return selected

def extract_clip4clip_video_embedding(images):
    inputs = processor(images=images, return_tensors="pt", padding=True).to(device)
    with torch.no_grad():
        outputs = model.get_image_features(**inputs)
    return outputs.mean(dim=0).cpu().numpy()  # (512,)


for root_folder in FOLDER_LIST:
    print(f"\nRoot folder: {root_folder}")

    top_level_folders = [f for f in os.listdir(root_folder) if os.path.isdir(os.path.join(root_folder, f))]

    for top_folder in top_level_folders:
        top_path = os.path.join(root_folder, top_folder)
        subfolders = [sf for sf in os.listdir(top_path) if os.path.isdir(os.path.join(top_path, sf))]

        print(f" Found {len(subfolders)} videos in: {top_folder}")

        for idx, sub_folder in enumerate(subfolders, start=1):
            folder_path = os.path.join(top_path, sub_folder)
            output_path = os.path.join(OUTPUT_DIR, f"{sub_folder}.npy")

            print(f"video {idx}/{len(subfolders)} → {sub_folder}")

            if os.path.exists(output_path):
                print(f" Skipping (already exists): {output_path}")
                continue

            try:
                frames = load_clip_frames(folder_path)
                emb = extract_clip4clip_video_embedding(frames)
                np.save(output_path, emb)
                print(f"Saved: {output_path}")
            except Exception as e:
                print(f"Skipped {folder_path}: {e}")



📂 Root folder: /content/frames_unzip1
 Found 1403 videos in: all_videos_frames
video 1/1403 → 7431153975866477832_frames
⏩ Skipping (already exists): /content/drive/MyDrive/video_vectorizer/clip4clip_embeddings/7431153975866477832_frames.npy
video 2/1403 → 7410671910494801169_frames
⏩ Skipping (already exists): /content/drive/MyDrive/video_vectorizer/clip4clip_embeddings/7410671910494801169_frames.npy
video 3/1403 → 7455028385836748052_frames
⏩ Skipping (already exists): /content/drive/MyDrive/video_vectorizer/clip4clip_embeddings/7455028385836748052_frames.npy
video 4/1403 → 7450507043993029895_frames
⏩ Skipping (already exists): /content/drive/MyDrive/video_vectorizer/clip4clip_embeddings/7450507043993029895_frames.npy
video 5/1403 → 7436002221600312583_frames
⏩ Skipping (already exists): /content/drive/MyDrive/video_vectorizer/clip4clip_embeddings/7436002221600312583_frames.npy
video 6/1403 → 7398671113494088961_frames
⏩ Skipping (already exists): /content/drive/MyDrive/video_vecto