In [None]:
import os
import numpy as np
import torch
from PIL import Image
from transformers import ViTFeatureExtractor, ViTModel

In [None]:
#load the feature extractor and the model
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')
model = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k')

In [None]:
frames_dir = '../frames/'
features_dir = './features/'

# Create the features directory if it doesn't exist
os.makedirs(features_dir, exist_ok=True)

In [None]:
# Function to extract features from a single image
def extract_features(image_path):
    image = Image.open(image_path).convert("RGB")
    inputs = feature_extractor(images=image, return_tensors="pt")
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state[:, 0, :].squeeze().cpu().numpy()

In [None]:
# Loop over each video folder
for video_folder in os.listdir(frames_dir):
    video_folder_path = os.path.join(frames_dir, video_folder)
    if os.path.isdir(video_folder_path):
        features = []
        # Loop over each frame in the video folder
        for frame_file in os.listdir(video_folder_path):
            frame_path = os.path.join(video_folder_path, frame_file)
            if frame_file.endswith(('.png', '.jpg', '.jpeg')):  # Adjust based on your frame file types
                feature = extract_features(frame_path)
                features.append(feature)
        
        # Convert the list of features to a numpy array
        features_array = np.stack(features)
        
        # Save the features array as a .npy file
        save_path = os.path.join(features_dir, f"{video_folder}_features.npy")
        np.save(save_path, features_array)
        print(f"Saved features for {video_folder} to {save_path}")