# Histogrammes des Orientations du Gradient

In [3]:
pip install opencv-python-headless numpy matplotlib tqdm scikit-image

Collecting scikit-image
  Downloading scikit_image-0.26.0-cp311-cp311-macosx_11_0_arm64.whl.metadata (15 kB)
Collecting scipy>=1.11.4 (from scikit-image)
  Using cached scipy-1.17.0-cp311-cp311-macosx_14_0_arm64.whl.metadata (62 kB)
Collecting networkx>=3.0 (from scikit-image)
  Using cached networkx-3.6.1-py3-none-any.whl.metadata (6.8 kB)
Collecting imageio!=2.35.0,>=2.33 (from scikit-image)
  Downloading imageio-2.37.2-py3-none-any.whl.metadata (9.7 kB)
Collecting tifffile>=2022.8.12 (from scikit-image)
  Downloading tifffile-2026.1.14-py3-none-any.whl.metadata (30 kB)
Collecting lazy-loader>=0.4 (from scikit-image)
  Downloading lazy_loader-0.4-py3-none-any.whl.metadata (7.6 kB)
Downloading scikit_image-0.26.0-cp311-cp311-macosx_11_0_arm64.whl (12.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.1/12.1 MB[0m [31m3.2 MB/s[0m  [33m0:00:03[0m eta [36m0:00:01[0m
[?25hDownloading imageio-2.37.2-py3-none-any.whl (317 kB)
Downloading lazy_loader-0.4-py3-none-

## Data Loading

In [1]:
import cv2
import numpy as np 
import matplotlib.pyplot as plt
import os
from tqdm import tqdm
import random

from skimage.feature import hog
from skimage import data, exposure

In [2]:
data_root_path = "data/UCF-101"

CLASS_NAMES = sorted(os.listdir(data_root_path))
if '.DS_Store' in CLASS_NAMES:
    CLASS_NAMES.remove('.DS_Store')
NUM_CLASSES = len(CLASS_NAMES)
print(f"Number of classes: {NUM_CLASSES}")

Number of classes: 5


In [None]:
## Function to extract frames from a video file
def extract_all_frames_from_video(video_path, frame_rate=3):
    frames = []
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Error opening video file: {video_path}")
        return frames

    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_interval = int(fps / frame_rate)

    frame_count = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if frame_count % frame_interval == 0:
            frames.append(frame)
        frame_count += 1

    cap.release()
    return frames

## Function to extract frame data for all videos in the dataset
def extract_dataset_all_frames(data_root_path, frame_rate=100):
    dataset_frames = {}
    for class_name in tqdm(CLASS_NAMES, desc="Processing classes"):
        class_path = os.path.join(data_root_path, class_name)
        video_files = [f for f in os.listdir(class_path) if f.endswith('.avi')]
        dataset_frames[class_name] = []
        for video_file in tqdm(video_files, desc=f"Processing videos in {class_name}", leave=False):
            video_path = os.path.join(class_path, video_file)
            frames = extract_frames_from_video(video_path, frame_rate)
            dataset_frames[class_name].extend(frames)
    return dataset_frames

def split_dataset(dataset, reference_ratio=0.75):
    train_set = {}
    test_set = {}
    for class_name, items in dataset.items():
        random.shuffle(items)
        split_index = int(len(items) * reference_ratio)
        train_set[class_name] = items[:split_index]
        test_set[class_name] = items[split_index:]
    return train_set, test_set


In [None]:
## using the videos in the data root, we compute some stats about the videos: number of frames, video length, frame size, etc
def compute_video_stats(data_root_path):
    stats = {}
    for class_name in tqdm(CLASS_NAMES, desc="Computing video stats for classes"):
        class_path = os.path.join(data_root_path, class_name)
        video_files = [f for f in os.listdir(class_path) if f.endswith('.avi')]
        num_videos = len(video_files)
        total_frames = 0
        frames = []
        for video_file in tqdm(video_files, desc=f"Processing videos in {class_name}", leave=False):
            video_path = os.path.join(class_path, video_file)
            video_frames = extract_frames_from_video(video_path, frame_rate=5)
            total_frames += len(video_frames)
            frames.append(video_frames)
        stats[class_name] = {
            "num_videos": num_videos,
            "total_frames": total_frames,
            "avg_frames_per_video": total_frames / num_videos,
            "min_frames_per_video": min([len(frames) for frames in frames]),
            "max_frames_per_video": max([len(frames) for frames in frames]),
            "avg_frame_size": np.mean([frame.shape for frames in frames for frame in frames], axis=0),
            "min_frame_size": np.min([frame.shape for frames in frames for frame in frames], axis=0),
            "max_frame_size": np.max([frame.shape for frames in frames for frame in frames], axis=0),
            "avg_video_length_sec": np.mean([len(frames)/5 for frames in frames]),
            "min_video_length_sec": min([len(frames)/5 for frames in frames]),
            "max_video_length_sec": max([len(frames)/5 for frames in frames]),
        }
    return stats

compute_video_stats(data_root_path)

Computing video stats for classes: 100%|██████████| 5/5 [00:16<00:00,  3.39s/it]


{'Biking': {'num_videos': 134,
  'total_frames': 6109,
  'avg_frames_per_video': 45.58955223880597,
  'min_frames_per_video': 21,
  'max_frames_per_video': 120,
  'avg_frame_size': array([240., 320.,   3.]),
  'min_frame_size': array([240, 320,   3]),
  'max_frame_size': array([240, 320,   3]),
  'avg_video_length_sec': np.float64(9.117910447761194),
  'min_video_length_sec': 4.2,
  'max_video_length_sec': 24.0},
 'HandstandPushups': {'num_videos': 128,
  'total_frames': 3007,
  'avg_frames_per_video': 23.4921875,
  'min_frames_per_video': 10,
  'max_frames_per_video': 51,
  'avg_frame_size': array([240., 320.,   3.]),
  'min_frame_size': array([240, 320,   3]),
  'max_frame_size': array([240, 320,   3]),
  'avg_video_length_sec': np.float64(4.6984375),
  'min_video_length_sec': 2.0,
  'max_video_length_sec': 10.2},
 'PlayingCello': {'num_videos': 164,
  'total_frames': 7597,
  'avg_frames_per_video': 46.323170731707314,
  'min_frames_per_video': 21,
  'max_frames_per_video': 83,
  'av

In [10]:
full_frames_dataset = extract_dataset_frames(data_root_path, frame_rate=1)
reference_frame_dataset, test_frame_dataset = split_dataset(full_frames_dataset, reference_ratio=0.75)

Processing classes: 100%|██████████| 5/5 [00:16<00:00,  3.28s/it]


## HOG Computation

In [21]:

## HOG feature extraction 
def extract_hog_features_for_image(image):
    fd, hog_image = hog(
        image,
        orientations=8,
        pixels_per_cell=(16, 16),
        cells_per_block=(1, 1),
        visualize=True,
        channel_axis=-1,
    )
    return fd, hog_image

## HOG Dataset feature extraction  
def extract_hog_features_for_dataset(dataset_frames):
    dataset_hog_features = {}
    for class_name, frames in tqdm(dataset_frames.items(), desc="Extracting HOG features"):
        dataset_hog_features[class_name] = []
        for frame in tqdm(frames, desc=f"Processing frames in {class_name})"):
            fd, hog_image = extract_hog_features_for_image(frame)
            dataset_hog_features[class_name].append((fd, hog_image))
    return dataset_hog_features

In [22]:
hog_reference_features_dataset = extract_hog_features_for_dataset(reference_frame_dataset)
hog_test_features_dataset = extract_hog_features_for_dataset(test_frame_dataset)

Processing frames in Biking):  36%|███▌      | 1646/4581 [00:49<01:29, 32.95it/s]
Extracting HOG features:   0%|          | 0/5 [00:49<?, ?it/s]


KeyboardInterrupt: 

## Nearest Centroid

In [None]:
def compute_class_hog_averages(hog_features_dataset):
    class_hog_averages = {}
    for class_name, features in hog_features_dataset.items():
        feature_vectors = [fd for fd, hog_image in features]
        average_fd = np.mean(feature_vectors, axis=0)
        class_hog_averages[class_name] = average_fd
    return class_hog_averages

class_hog_averages = compute_class_hog_averages(hog_features_dataset)

In [None]:
def measure_distance(fd1, fd2):
    return np.linalg.norm(fd1 - fd2)

def classify_frame_hog(fd, class_hog_averages):
    min_distance = float('inf')
    predicted_class = None
    for class_name, average_fd in class_hog_averages.items():
        distance = measure_distance(fd, average_fd)
        if distance < min_distance:
            min_distance = distance
            predicted_class = class_name
    return predicted_class

## Evaluation: cmopute preicison, recall and f1 and create confusion matrix
def evaluate_classification(hog_test_features_dataset, class_hog_averages):
    y_true = []
    y_pred = []
    for class_name, features in hog_test_features_dataset.items():
        for fd, hog_image in features:
            predicted_class = classify_frame_hog(fd, class_hog_averages)
            y_true.append(class_name)
            y_pred.append(predicted_class)

    from sklearn.metrics import classification_report, confusion_matrix
    print(classification_report(y_true, y_pred, target_names=CLASS_NAMES))
    cm = confusion_matrix(y_true, y_pred, labels=CLASS_NAMES)
    print("Confusion Matrix:")
    print(cm)

