In [None]:
"""
====================================================
Classical Machine Learning Pipeline for Video Actions
====================================================

This script implements and compares three classical
machine learning algorithms for video-based activity
recognition using hand-crafted features.

Algorithms Implemented
----------------------------------------------------
1. Support Vector Machine (Linear + RBF)
2. Random Forest Classifier
3. k-Nearest Neighbors (k-NN)

Features are extracted using:
- feature_extraction.py (per-video feature extraction)
- data_loader.py (train/val/test splits)

Author: Student_2024AB05275
"""

# ==================================================
# STANDARD LIBRARY IMPORTS
# ==================================================
from typing import Dict
import warnings

# ==================================================
# THIRD-PARTY IMPORTS
# ==================================================
import numpy as np
import matplotlib.pyplot as plt

from sklearn.base import BaseEstimator
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    ConfusionMatrixDisplay,
)

from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

# ==================================================
# PROJECT IMPORTS
# ==================================================
from feature_extraction import extract_video_features
from data_loader import load_split_data

# ==================================================
# GLOBAL CONFIGURATION
# ==================================================
warnings.filterwarnings("ignore")
RANDOM_STATE = 42
CV_FOLDS = 5
MAX_FRAMES = 50   # Frame cap for consistency & speed


# ==================================================
# UTILITY FUNCTIONS
# ==================================================
def extract_features_for_split(video_paths):
    """
    Extract features for a list of video files.

    Each video is processed independently using
    extract_video_features(), producing a fixed-length
    feature vector per video.

    Args:
        video_paths (list[str]): paths to video files

    Returns:
        np.ndarray: Feature matrix of shape (N, D)
    """
    features = []

    for idx, video_path in enumerate(video_paths):
        print(f"[INFO] Processing video {idx + 1}/{len(video_paths)}")
        feature_vector = extract_video_features(
            str(video_path),
            max_frames=MAX_FRAMES
        )
        features.append(feature_vector)

    return np.vstack(features)


def evaluate_model(
    model: BaseEstimator,
    X_test: np.ndarray,
    y_test: np.ndarray,
) -> Dict[str, float]:
    """
    Evaluate a trained model using multiple metrics.

    Returns:
        dict: accuracy, precision, recall, f1-score
    """
    y_pred = model.predict(X_test)

    return {
        "accuracy": accuracy_score(y_test, y_pred),
        "precision": precision_score(y_test, y_pred, average="macro"),
        "recall": recall_score(y_test, y_pred, average="macro"),
        "f1_score": f1_score(y_test, y_pred, average="macro"),
    }


def plot_confusion(model, X_test, y_test, title: str) -> None:
    """
    Plot confusion matrix for a classifier.
    """
    cm = confusion_matrix(y_test, model.predict(X_test))
    disp = ConfusionMatrixDisplay(confusion_matrix=cm)
    disp.plot()
    plt.title(title)
    plt.show()
    plt.close()


# ==================================================
# DATA LOADING & FEATURE EXTRACTION
# ==================================================
print("\nüì• Loading dataset splits...")

train_videos, y_train = load_split_data(split_name="train")
val_videos, y_val = load_split_data(split_name="val")
test_videos, y_test = load_split_data(split_name="test")

print("\nüéØ Extracting features... (this may take time on first run)")

X_train = extract_features_for_split(train_videos)
X_val = extract_features_for_split(val_videos)
X_test = extract_features_for_split(test_videos)

print(f"\nTrain feature shape: {X_train.shape}")
print(f"Test feature shape : {X_test.shape}")


# ==================================================
# 1Ô∏è‚É£ SUPPORT VECTOR MACHINE (LINEAR + RBF)
# ==================================================
print("\nüöÄ Training Support Vector Machine...")

svm_pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("svm", SVC())
])

svm_param_grid = [
    {"svm__kernel": ["linear"], "svm__C": [0.1, 1, 10]},
    {
        "svm__kernel": ["rbf"],
        "svm__C": [0.1, 1, 10],
        "svm__gamma": [0.01, 0.1, 1],
    },
]

svm_grid = GridSearchCV(
    svm_pipeline,
    svm_param_grid,
    cv=CV_FOLDS,
    scoring="accuracy",
    n_jobs=-1,
)

svm_grid.fit(X_train, y_train)
best_svm = svm_grid.best_estimator_

svm_metrics = evaluate_model(best_svm, X_test, y_test)
plot_confusion(best_svm, X_test, y_test, "SVM Confusion Matrix")


# ==================================================
# 2Ô∏è‚É£ RANDOM FOREST CLASSIFIER
# ==================================================
print("\nüå≤ Training Random Forest...")

rf = RandomForestClassifier(random_state=RANDOM_STATE)

rf_param_grid = {
    "n_estimators": [100, 200],
    "max_depth": [10, 20, None],
    "min_samples_split": [2, 5],
}

rf_grid = GridSearchCV(
    rf,
    rf_param_grid,
    cv=CV_FOLDS,
    scoring="accuracy",
    n_jobs=-1,
)

rf_grid.fit(X_train, y_train)
best_rf = rf_grid.best_estimator_

rf_metrics = evaluate_model(best_rf, X_test, y_test)
plot_confusion(best_rf, X_test, y_test, "Random Forest Confusion Matrix")


# ==================================================
# 3Ô∏è‚É£ K-NEAREST NEIGHBORS
# ==================================================
print("\nüìè Training k-Nearest Neighbors...")

knn_pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("knn", KNeighborsClassifier())
])

knn_param_grid = {
    "knn__n_neighbors": [3, 5, 7, 9],
    "knn__metric": ["euclidean", "manhattan"],
}

knn_grid = GridSearchCV(
    knn_pipeline,
    knn_param_grid,
    cv=CV_FOLDS,
    scoring="accuracy",
    n_jobs=-1,
)

knn_grid.fit(X_train, y_train)
best_knn = knn_grid.best_estimator_

knn_metrics = evaluate_model(best_knn, X_test, y_test)
plot_confusion(best_knn, X_test, y_test, "k-NN Confusion Matrix")


# ==================================================
# üìä COMPARATIVE ANALYSIS
# ==================================================
print("\nüìä Comparative Model Analysis")

results = {
    "SVM": svm_metrics,
    "Random Forest": rf_metrics,
    "k-NN": knn_metrics,
}

metrics_names = list(next(iter(results.values())).keys())
model_names = list(results.keys())

metrics_matrix = np.array(
    [[results[m][metric] for metric in metrics_names] for m in model_names]
)

# --------------------------
# Bar Plot Comparison
# --------------------------
x = np.arange(len(metrics_names))
width = 0.25

plt.figure(figsize=(10, 6))
for i, model in enumerate(model_names):
    plt.bar(
        x + i * width,
        metrics_matrix[i],
        width,
        label=model,
    )

plt.xticks(x + width, metrics_names)
plt.ylabel("Score")
plt.title("Classical ML Model Performance Comparison")
plt.legend()
plt.grid(axis="y")
plt.show()
plt.close()


# ==================================================
# üèÜ FINAL DYNAMIC SUMMARY
# ==================================================
best_model = max(
    results.items(),
    key=lambda item: item[1]["f1_score"],
)

print("\nüèÜ Final Summary")
for model, metrics in results.items():
    print(f"\n{model}")
    for k, v in metrics.items():
        print(f"  {k:<10}: {v:.4f}")

print(
    f"\n‚úÖ Best overall model based on macro F1-score: "
    f"{best_model[0]}"
)



üì• Loading dataset splits...
[INFO] Opening video: /Users/chocalingamlakshmanan/Desktop/Video-analytics-assignment/Student_2024ab05275_Video_Classification/dataset/class_1_Basketball/v_Basketball_g13_c04.avi
[INFO] Frame limit reached: 50
[INFO] Frames processed: 50
[INFO] Final feature vector length: 399
[INFO] Opening video: /Users/chocalingamlakshmanan/Desktop/Video-analytics-assignment/Student_2024ab05275_Video_Classification/dataset/class_1_Basketball/v_Basketball_g15_c05.avi
[INFO] Frame limit reached: 50
[INFO] Frames processed: 50
[INFO] Final feature vector length: 399
[INFO] Opening video: /Users/chocalingamlakshmanan/Desktop/Video-analytics-assignment/Student_2024ab05275_Video_Classification/dataset/class_1_Basketball/v_Basketball_g19_c05.avi
[INFO] Frame limit reached: 50
[INFO] Frames processed: 50
[INFO] Final feature vector length: 399
[INFO] Opening video: /Users/chocalingamlakshmanan/Desktop/Video-analytics-assignment/Student_2024ab05275_Video_Classification/dataset

error: OpenCV(4.12.0) :-1: error: (-5:Bad argument) in function 'VideoCapture'
> Overload resolution failed:
>  - Expected 'filename' to be a str or path-like object
>  - VideoCapture() missing required argument 'apiPreference' (pos 2)
>  - Argument 'index' is required to be an integer
>  - VideoCapture() missing required argument 'apiPreference' (pos 2)
>  - VideoCapture() missing required argument 'apiPreference' (pos 2)
