## **0. Environment Setup**




In [7]:
DATASET_PATH = "../data/"

In [8]:
import os
import cv2
import numpy as np
import pickle
import sys
import random

# Add project root to path to import from src
sys.path.append(os.path.abspath('..'))

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

from src.preprocessing import extract_pose_vector

I0000 00:00:1766794324.401228 12365986 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 90.5), renderer: Apple M1 Pro
I0000 00:00:1766794324.410465 12365986 gl_context.cc:369] GL version: 2.1 (2.1 Metal - 90.5), renderer: Apple M1 Pro


In [9]:
print("DATASET_PATH content:")
!ls "$DATASET_PATH"


W0000 00:00:1766794324.431387 12389385 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1766794324.447659 12389389 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1766794324.495865 12389376 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.
W0000 00:00:1766794324.509886 12389381 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


DATASET_PATH content:
[34m1[m[m         [34m3[m[m         [34m5[m[m         [34m7[m[m         README.md
[34m2[m[m         [34m4[m[m         [34m6[m[m         [34m8[m[m


In [10]:
from torchvision import transforms
from PIL import Image

# Create augmentations
augmentations = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),      # Horizontal flip
    transforms.RandomRotation(degrees=15),       # Rotate +- 15 deg
    transforms.ColorJitter(brightness=0.2),      # Jitter brightness
    transforms.RandomResizedCrop(size=(224, 224), scale=(0.7, 1.0)) # Crop and resize
])

In [11]:
X = []
y = []

video_ext = (".mp4", ".avi", ".mov", ".mkv")

for class_name in sorted(os.listdir(DATASET_PATH)):
    class_path = os.path.join(DATASET_PATH, class_name)
    if not os.path.isdir(class_path):
        continue

    print(f"→ Class {class_name}")

    for fname in sorted(os.listdir(class_path)):
        if not fname.lower().endswith(video_ext):
            continue

        video_path = os.path.join(class_path, fname)
        print(f"    Video: {fname}")

        cap = cv2.VideoCapture(video_path)
        frame_id = 0

        while True:
            ret, frame = cap.read()
            if not ret:
                break

            # 1. Take every 2nd frame
            if frame_id % 2 == 0:
                
                # --- AUGMENTATION BLOCK ---
                # Copy frame for augmentation
                aug_frame = frame.copy()


                # Prevents model from bias to one side
                if random.random() > 0.5:
                    aug_frame = cv2.flip(aug_frame, 1)


                angle = random.uniform(-15, 15)
                h, w = aug_frame.shape[:2]
                M = cv2.getRotationMatrix2D((w // 2, h // 2), angle, 1.0)
                aug_frame = cv2.warpAffine(aug_frame, M, (w, h))


                brightness = random.uniform(0.7, 1.3)
                aug_frame = cv2.convertScaleAbs(aug_frame, alpha=brightness, beta=0)
                
                # --- VECTOR EXTRACTION ---
                # Process augmented frame
                vec = extract_pose_vector(aug_frame)
                
                if vec is not None:
                    X.append(vec)
                    y.append(class_name)

            frame_id += 1

        cap.release()

X = np.array(X)
y = np.array(y)

print("Total poses:", len(X))
print("Classes:", np.unique(y))


→ Class 1
    Video: 1_1.mov


W0000 00:00:1766794324.846409 12389379 landmark_projection_calculator.cc:186] Using NORM_RECT without IMAGE_DIMENSIONS is only supported for the square ROI. Provide IMAGE_DIMENSIONS or use PROJECTION_MATRIX.


    Video: 1_2.mov
    Video: 1_3.mov
    Video: 1_4.MOV
    Video: 1_5.mp4
→ Class 2
    Video: 2_1.mov
    Video: 2_2.mov
    Video: 2_3.mov
    Video: 2_4.MOV
    Video: 2_5.mp4
→ Class 3
    Video: 3_1.mov
    Video: 3_2.mov
    Video: 3_3.mov
    Video: 3_4.MOV
    Video: 3_5.mp4
→ Class 4
    Video: 4_1.mov
    Video: 4_2.mov
    Video: 4_3.mov
    Video: 4_4.MOV
    Video: 4_5.mp4
→ Class 5
    Video: 5_1.mov
    Video: 5_2.mov
    Video: 5_3.mov
    Video: 5_4.MOV
    Video: 5_5.mp4
→ Class 6
    Video: 6_1.mov
    Video: 6_2.mov
    Video: 6_3.mov
    Video: 6_4.MOV
    Video: 6_5.mp4
→ Class 7
    Video: 7_1.mov
    Video: 7_2.mov
    Video: 7_3.mov
    Video: 7_4.mov
    Video: 7_5.mp4
→ Class 8
    Video: 8_1.mov
    Video: 8_2.mov
    Video: 8_3.mov
    Video: 8_4.mov
    Video: 8_5.mov
Total poses: 4251
Classes: ['1' '2' '3' '4' '5' '6' '7' '8']


In [12]:
if len(np.unique(y)) < 2:
    raise ValueError(f"Only one class found: {np.unique(y)}. Check folder structure and data.")

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# PCA: keep 98% variance for better quality
pca = PCA(n_components=0.98)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)
print(f"PCA: {X_train_scaled.shape[1]} -> {X_train_pca.shape[1]} components (98% variance)")


PCA: 126 -> 24 components (98% variance)


In [13]:
# Best parameters found: C=200, gamma=0.05
clf = SVC(C=200, gamma=0.05, kernel='rbf', probability=True)
clf.fit(X_train_pca, y_train)

y_pred = clf.predict(X_test_pca)
print("Quality report:\n")
print(classification_report(y_test, y_pred))
print("Confusion matrix:")
print(confusion_matrix(y_test, y_pred))


Quality report:

              precision    recall  f1-score   support

           1       0.97      0.95      0.96       146
           2       1.00      0.98      0.99        90
           3       0.97      0.92      0.94       129
           4       1.00      0.95      0.97        60
           5       0.96      0.96      0.96       117
           6       0.94      0.98      0.96       137
           7       0.96      0.98      0.97       134
           8       0.86      1.00      0.93        38

    accuracy                           0.96       851
   macro avg       0.96      0.96      0.96       851
weighted avg       0.96      0.96      0.96       851

Confusion matrix:
[[138   0   2   0   0   5   0   1]
 [  0  88   0   0   0   1   0   1]
 [  0   0 119   0   5   1   4   0]
 [  0   0   0  57   0   0   0   3]
 [  1   0   2   0 112   1   1   0]
 [  3   0   0   0   0 134   0   0]
 [  1   0   0   0   0   1 131   1]
 [  0   0   0   0   0   0   0  38]]


In [14]:
model = {
    "clf": clf,
    "scaler": scaler,
    "pca": pca,
    "classes": sorted(list(np.unique(y)))
}

model_path = "../models/model.pkl"
with open(model_path, "wb") as f:
    pickle.dump(model, f)

print("Model saved at:", model_path)


Model saved at: ../models/model.pkl
