In [1]:
from open_clip import create_model_from_pretrained, get_tokenizer
import cv2
from PIL import Image
import torch
import numpy as np
import glob
import os
import h5py
from tqdm import tqdm


Disabling PyTorch because PyTorch >= 2.1 is required but found 1.13.0+cu116
None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


In [2]:
def extract_frames(video_path, fps=1, max_frames=20):
    cap = cv2.VideoCapture(video_path)
    video_fps = cap.get(cv2.CAP_PROP_FPS)
    frame_interval = int(video_fps / fps)
    frames = []
    count = 0

    while cap.isOpened() and len(frames) < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        if count % frame_interval == 0:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(Image.fromarray(frame))
        count += 1
    cap.release()
    return frames 

In [3]:
model, preprocess = create_model_from_pretrained('hf-hub:apple/DFN2B-CLIP-ViT-B-16') #xclip
tokenizer = get_tokenizer('ViT-B-16')

model.cuda()
def extract_clip_features(frames):
    images = [preprocess(img).unsqueeze(0) for img in frames]
    images = torch.cat(images).to('cuda')  

    with torch.no_grad():
        features = model.encode_image(images)
        features = features / features.norm(dim=-1, keepdim=True) 

    return features.cpu().numpy()  # shape: (20, 512)


In [4]:
#distilbert/distilgpt2
video_paths = glob.glob(os.path.join('../Dataset/MSVD/raw/', '*.avi'))
# video_paths = video_paths[8000:10000]
with h5py.File('data/MSVD/features/MSVD_visual_clip.hdf5', 'w') as f:
    for video_path in tqdm(video_paths, desc="Extracting features"):
        video_name = os.path.splitext(os.path.basename(video_path))[0]
        frames = extract_frames(video_path)
        features = extract_clip_features(frames)
        
        f.create_dataset(video_name, data=features)

Extracting features: 100%|██████████| 1970/1970 [21:32<00:00,  1.52it/s]
