# ResNet

In [2]:
pip install opencv-python-headless numpy matplotlib tqdm scikit-image


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [16]:
pip install torch torchvision


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


## Data Loading

In [17]:
import cv2
import numpy as np 
import matplotlib.pyplot as plt
import os
from tqdm import tqdm
import random

from skimage.feature import hog
from skimage import data, exposure

import torch
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image

In [21]:
data_root_path = "data/UCF-101"

CLASS_NAMES = sorted(os.listdir(data_root_path))
if '.DS_Store' in CLASS_NAMES :
    CLASS_NAMES.remove('.DS_Store')
NUM_CLASSES = len(CLASS_NAMES)
print(f"Number of classes: {NUM_CLASSES}")

Number of classes: 5


In [19]:
## Function to extract frames from a video file
def extract_frames_from_video(video_path, frame_rate=5):
    frames = []
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error opening video file: {video_path}")
        return frames

    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_interval = int(fps / frame_rate)

    frame_count = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if frame_count % frame_interval == 0:
            frames.append(frame)
        frame_count += 1

    cap.release()
    return frames

## Function to extract frame data for all videos in the dataset
def extract_dataset_frames(data_root_path, frame_rate=5):
    dataset_frames = {}
    for class_name in tqdm(CLASS_NAMES, desc="Processing classes"):
        class_path = os.path.join(data_root_path, class_name)
        video_files = [f for f in os.listdir(class_path) if f.endswith('.avi')]
        dataset_frames[class_name] = []
        for video_file in tqdm(video_files, desc=f"Processing videos in {class_name}", leave=False):
            video_path = os.path.join(class_path, video_file)
            frames = extract_frames_from_video(video_path, frame_rate)
            dataset_frames[class_name].extend(frames)
    return dataset_frames

def split_dataset(dataset, train_ratio=0.8):
    train_set = {}
    test_set = {}
    for class_name, items in dataset.items():
        random.shuffle(items)
        split_index = int(len(items) * train_ratio)
        train_set[class_name] = items[:split_index]
        test_set[class_name] = items[split_index:]
    return train_set, test_set


In [22]:
frames_dataset = extract_dataset_frames(data_root_path, frame_rate=5)
frames_dataset = split_dataset(frames_dataset)

Processing classes: 100%|██████████| 5/5 [00:17<00:00,  3.52s/it]


## ResNet Computation

In [23]:
model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)

Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /Users/gaspardjuillet/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth


100%|██████████| 97.8M/97.8M [00:51<00:00, 2.00MB/s]


In [None]:
model.eval()
preprocess = models.ResNet50_Weights.DEFAULT.transforms()

## Nearest Centroid

In [None]:
def compute_class_hog_averages(hog_features_dataset):
    class_hog_averages = {}
    for class_name, features in hog_features_dataset.items():
        feature_vectors = [fd for fd, hog_image in features]
        average_fd = np.mean(feature_vectors, axis=0)
        class_hog_averages[class_name] = average_fd
    return class_hog_averages

class_hog_averages = compute_class_hog_averages(hog_features_dataset)

In [None]:
def 