In [None]:
!pip install transformers torchvision

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch==2.6.0->torchvision)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch==2.6.0->torchvision)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch==2.6.0->torchvision)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch==2.6.0->torchvision)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch==2.6.0->torchvision)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch==2.6.0->torchvision)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import zipfile
import os

# Paths to ZIP files in Drive
zip1 = "/content/drive/MyDrive/all_videos1.zip"
zip2 = "/content/drive/MyDrive/all_videos2.zip"

# Where to extract them
dest1 = "/content/frames_unzip1"
dest2 = "/content/frames_unzip2"

# Extract
for zip_path, dest in [(zip1, dest1), (zip2, dest2)]:
    os.makedirs(dest, exist_ok=True)
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(dest)

print("✅ Both ZIPs extracted.")

✅ Both ZIPs extracted.


In [None]:
import os
import torch
import numpy as np
from PIL import Image
from tqdm import tqdm
from transformers import VideoMAEModel, VideoMAEFeatureExtractor

In [None]:
FRAMES_PER_VIDEO = 16
FRAME_SIZE = (224, 224)
OUTPUT_DIR = "/content/drive/MyDrive/video_vectorizer/videomae_embeddings"
os.makedirs(OUTPUT_DIR, exist_ok=True)

FOLDER_LIST = [
    "/content/frames_unzip1",
    "/content/frames_unzip2"
]


feature_extractor = VideoMAEFeatureExtractor.from_pretrained("MCG-NJU/videomae-base")
model = VideoMAEModel.from_pretrained("MCG-NJU/videomae-base")
model.eval()


def load_frames_from_folder(folder_path, num_frames=FRAMES_PER_VIDEO):
    all_frames = sorted([
        os.path.join(folder_path, f)
        for f in os.listdir(folder_path)
        if f.lower().endswith((".jpg", ".png"))
    ])

    if len(all_frames) == 0:
        raise ValueError(f"No frames found in {folder_path}")


    indices = np.linspace(0, len(all_frames) - 1, num=min(num_frames, len(all_frames)), dtype=int)
    images = []
    for idx in indices:
        img = Image.open(all_frames[idx]).convert("RGB").resize(FRAME_SIZE)
        images.append(np.array(img))

    while len(images) < num_frames:
        images.append(np.zeros_like(images[0]))

    return images

def extract_videomae_embedding(frames):
    inputs = feature_extractor(frames, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
        embedding = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
    return embedding


for root_folder in FOLDER_LIST:
    for top_folder in tqdm(os.listdir(root_folder)):
        top_path = os.path.join(root_folder, top_folder)
        if not os.path.isdir(top_path):
            continue

        for sub_folder in os.listdir(top_path):
            folder_path = os.path.join(top_path, sub_folder)
            if not os.path.isdir(folder_path):
                continue
            try:
                frames = load_frames_from_folder(folder_path)
                emb = extract_videomae_embedding(frames)

                output_path = os.path.join(OUTPUT_DIR, f"{sub_folder}.npy")
                if os.path.exists(output_path):
                    print(f"⏩ Skipping (already exists): {output_path}")
                    continue

                np.save(output_path, emb)
                print(f"✅ Saved: {output_path}")


                # output_path = os.path.join(OUTPUT_DIR, f"{sub_folder}.npy")
                # np.save(output_path, emb)
                # print(f"✅ Saved: {output_path}")

            except Exception as e:
                print(f"⚠️ Skipped {folder_path}: {e}")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/271 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/725 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/377M [00:00<?, ?B/s]

  0%|          | 0/1 [00:00<?, ?it/s]

⏩ Skipping (already exists): /content/drive/MyDrive/video_vectorizer/videomae_embeddings/7431153975866477832_frames.npy
⏩ Skipping (already exists): /content/drive/MyDrive/video_vectorizer/videomae_embeddings/7410671910494801169_frames.npy
⏩ Skipping (already exists): /content/drive/MyDrive/video_vectorizer/videomae_embeddings/7455028385836748052_frames.npy
⏩ Skipping (already exists): /content/drive/MyDrive/video_vectorizer/videomae_embeddings/7450507043993029895_frames.npy
⏩ Skipping (already exists): /content/drive/MyDrive/video_vectorizer/videomae_embeddings/7436002221600312583_frames.npy
⏩ Skipping (already exists): /content/drive/MyDrive/video_vectorizer/videomae_embeddings/7398671113494088961_frames.npy
⏩ Skipping (already exists): /content/drive/MyDrive/video_vectorizer/videomae_embeddings/7456330597179428117_frames.npy
⏩ Skipping (already exists): /content/drive/MyDrive/video_vectorizer/videomae_embeddings/7437969710555172097_frames.npy
⏩ Skipping (already exists): /content/dr

100%|██████████| 1/1 [2:43:08<00:00, 9788.09s/it]


⏩ Skipping (already exists): /content/drive/MyDrive/video_vectorizer/videomae_embeddings/7454110899469683975_frames.npy


  0%|          | 0/1 [00:00<?, ?it/s]

⏩ Skipping (already exists): /content/drive/MyDrive/video_vectorizer/videomae_embeddings/7379769116506295559_frames.npy
✅ Saved: /content/drive/MyDrive/video_vectorizer/videomae_embeddings/7363342561160776967_frames.npy
✅ Saved: /content/drive/MyDrive/video_vectorizer/videomae_embeddings/7376166616624680209_frames.npy
⏩ Skipping (already exists): /content/drive/MyDrive/video_vectorizer/videomae_embeddings/7403638607635893522_frames.npy
⏩ Skipping (already exists): /content/drive/MyDrive/video_vectorizer/videomae_embeddings/7458289981463727367_frames.npy
✅ Saved: /content/drive/MyDrive/video_vectorizer/videomae_embeddings/7355933677798362385_frames.npy
⏩ Skipping (already exists): /content/drive/MyDrive/video_vectorizer/videomae_embeddings/7387127423864802578_frames.npy
⏩ Skipping (already exists): /content/drive/MyDrive/video_vectorizer/videomae_embeddings/7352681003313384705_frames.npy
✅ Saved: /content/drive/MyDrive/video_vectorizer/videomae_embeddings/7296914460067761410_frames.npy


100%|██████████| 1/1 [2:02:46<00:00, 7366.48s/it]

✅ Saved: /content/drive/MyDrive/video_vectorizer/videomae_embeddings/7349507112038321426_frames.npy



