# HCF + PCA + SVM on Degraded Test Set
Pipeline: color histogram + HOG/LBP/GLCM/Gabor, then PCA, then SVM. Evaluation on degraded test set (mixed scenarios).

In [23]:
import os
import shutil
import subprocess
import time
import math
import random
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import cv2
import torch
from torch.utils.data import Dataset, DataLoader, random_split
import torchvision.transforms as T
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from skimage.feature import hog, local_binary_pattern, graycomatrix, graycoprops
from skimage.filters import gabor
from skimage.color import rgb2gray
import joblib

RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)
random.seed(RANDOM_STATE)
torch.manual_seed(RANDOM_STATE)

SIZE = 32
VARIETY = False
BATCH_SIZE = 100
AUG_RATIO = 0.20
AUG_DIST = {"A": 0.4, "B": 0.4, "C": 0.2}
HIST_BINS = 32
VARIANCE_TARGETS = [0.80, 0.90, 0.95]
CV_FOLDS = 3
TUNING_SUBSET_RATIO = 0.20
C_VALUES = [10, 100]
GAMMA_VALUES = [0.01, 0.001]

print("Configuration:")
print(f"  Image size: {SIZE}x{SIZE}")
print(f"  Augmentation ratio: {AUG_RATIO*100}%")
print(f"  Augmentation dist: {AUG_DIST}")
print(f"  Histogram bins: {HIST_BINS}")
print(f"  Variance targets: {VARIANCE_TARGETS}")
print(f"  CV folds: {CV_FOLDS}")
print(f"  Tuning subset ratio: {TUNING_SUBSET_RATIO}")
print(f"  C values: {C_VALUES}")
print(f"  Gamma values: {GAMMA_VALUES}")

Configuration:
  Image size: 32x32
  Augmentation ratio: 20.0%
  Augmentation dist: {'A': 0.4, 'B': 0.4, 'C': 0.2}
  Histogram bins: 32
  Variance targets: [0.8, 0.9, 0.95]
  CV folds: 3
  Tuning subset ratio: 0.2
  C values: [10, 100]
  Gamma values: [0.01, 0.001]


In [24]:
ROOT_DIR = "dataset/fruit360"
TRAIN_DIR = os.path.join(ROOT_DIR, "Training")
TEST_DIR = os.path.join(ROOT_DIR, "Test")

GITHUB_REPO = "https://github.com/fruits-360/fruits-360-100x100"
CLONE_DIR = "dataset/fruits-360-100x100"

def download_dataset():
    os.makedirs("dataset", exist_ok=True)
    subprocess.run(["git", "clone", GITHUB_REPO, CLONE_DIR], check=True)
    os.makedirs(ROOT_DIR, exist_ok=True)
    shutil.move(os.path.join(CLONE_DIR, "Training"), TRAIN_DIR)
    shutil.move(os.path.join(CLONE_DIR, "Test"), TEST_DIR)
    shutil.rmtree(CLONE_DIR, ignore_errors=True)

if not os.path.exists(ROOT_DIR):
    download_dataset()

assert os.path.exists(TRAIN_DIR), f"{TRAIN_DIR} not found"
assert os.path.exists(TEST_DIR), f"{TEST_DIR} not found"
print(f"Dataset ready: {ROOT_DIR}")

Dataset ready: dataset/fruit360


In [25]:
class Fruit360FolderDataset(Dataset):
    def __init__(self, root_dir, transform=None, variety=False):
        self.root_dir = root_dir
        self.transform = transform
        self.variety = variety
        self.samples = []
        for class_name in sorted(os.listdir(root_dir)):
            class_dir = os.path.join(root_dir, class_name)
            if not os.path.isdir(class_dir):
                continue
            label = class_name if self.variety else class_name.split()[0]
            for img_name in os.listdir(class_dir):
                if img_name.lower().endswith((".jpg", ".png")):
                    self.samples.append((os.path.join(class_dir, img_name), label))
        unique_labels = sorted({lbl for _, lbl in self.samples})
        self.label_to_idx = {lbl: i for i, lbl in enumerate(unique_labels)}
        self.idx_to_label = {i: lbl for lbl, i in self.label_to_idx.items()}
        print(f"{os.path.basename(root_dir)}: {len(self.samples)} images, {len(unique_labels)} classes")

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path, label_str = self.samples[idx]
        img = Image.open(img_path).convert("RGB")
        if self.transform:
            img = self.transform(img)
        return img, self.label_to_idx[label_str]

In [26]:
transform = T.Compose([
    T.Resize((SIZE, SIZE)),
    T.ToTensor(),
])

train_full = Fruit360FolderDataset(TRAIN_DIR, transform=transform, variety=VARIETY)
test_dataset = Fruit360FolderDataset(TEST_DIR, transform=transform, variety=VARIETY)

train_size = int(0.7 * len(train_full))
val_size = len(train_full) - train_size

train_dataset, val_dataset = random_split(
    train_full,
    [train_size, val_size],
    generator=torch.Generator().manual_seed(RANDOM_STATE),
)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

print(f"Train {len(train_dataset)}, Val {len(val_dataset)}, Test {len(test_dataset)}")
print(f"Classes: {len(train_full.label_to_idx)}")

Training: 130344 images, 79 classes
Test: 43442 images, 79 classes
Train 91240, Val 39104, Test 43442
Classes: 79


In [27]:
def extract_numpy(loader):
    X_list, y_list = [], []
    for imgs, labels in loader:
        X_list.append(imgs.numpy())
        y_list.append(labels.numpy())
    X = np.concatenate(X_list, axis=0)
    y = np.concatenate(y_list, axis=0)
    return X, y

X_train, y_train = extract_numpy(train_loader)
X_val, y_val = extract_numpy(val_loader)
X_test, y_test = extract_numpy(test_loader)

print(f"X_train {X_train.shape}, y_train {y_train.shape}")
print(f"X_val {X_val.shape}, y_val {y_val.shape}")
print(f"X_test {X_test.shape}, y_test {y_test.shape}")

X_train (91240, 3, 32, 32), y_train (91240,)
X_val (39104, 3, 32, 32), y_val (39104,)
X_test (43442, 3, 32, 32), y_test (43442,)


## Augmentation: Scenario Functions

In [28]:
def clamp_01(x):
    return torch.clamp(x, 0.0, 1.0)

def add_color_patches(x, num_patches, color, alpha_range=(0.4, 0.7), size_range=(0.05, 0.15)):
    _, H, W = x.shape
    out = x.clone()
    for _ in range(num_patches):
        s = np.random.uniform(size_range[0], size_range[1])
        patch_area = s * H * W / 4
        r = np.random.uniform(0.5, 1.5)
        patch_h = int(math.sqrt(patch_area / r))
        patch_w = int(math.sqrt(patch_area * r))
        patch_h = max(1, min(H, patch_h))
        patch_w = max(1, min(W, patch_w))
        top = np.random.randint(0, H - patch_h + 1)
        left = np.random.randint(0, W - patch_w + 1)
        bottom = top + patch_h
        right = left + patch_w
        alpha = np.random.uniform(alpha_range[0], alpha_range[1])
        patch = out[:, top:bottom, left:right]
        blended = alpha * color + (1 - alpha) * patch
        out[:, top:bottom, left:right] = blended
    return clamp_01(out)

def add_occlusion_patch(x, area_ratio=0.1, color=torch.tensor([0.5, 0.5, 0.5]).view(3, 1, 1), alpha=0.5):
    _, H, W = x.shape
    out = x.clone()
    patch_area = area_ratio * H * W
    r = np.random.uniform(0.5, 1.5)
    patch_h = int(math.sqrt(patch_area / r))
    patch_w = int(math.sqrt(patch_area * r))
    patch_h = max(1, min(H, patch_h))
    patch_w = max(1, min(W, patch_w))
    top = np.random.randint(0, H - patch_h + 1)
    left = np.random.randint(0, W - patch_w + 1)
    bottom = top + patch_h
    right = left + patch_w
    patch = out[:, top:bottom, left:right]
    blended = alpha * color + (1 - alpha) * patch
    out[:, top:bottom, left:right] = blended
    return clamp_01(out)

color_dirt = torch.tensor([0.3, 0.25, 0.2]).view(3, 1, 1)
color_bruise = torch.tensor([0.25, 0.2, 0.15]).view(3, 1, 1)

def noise_mild(x):
    return clamp_01(x + torch.randn_like(x) * 0.025)

def dark_mild(x):
    return clamp_01(x * 0.65)

def overexposed_mild(x):
    return clamp_01(x * 1.35)

def dirty_mild(x):
    return add_color_patches(x, num_patches=2, color=color_dirt, alpha_range=(0.5, 0.8), size_range=(0.03, 0.08))

def bruised_mild(x):
    return add_color_patches(x, num_patches=1, color=color_bruise, alpha_range=(0.4, 0.7), size_range=(0.03, 0.08))

def occlusion_small(x):
    return add_occlusion_patch(x, area_ratio=0.10, alpha=0.5)

blur_medium = T.GaussianBlur(kernel_size=5, sigma=1.0)

def scenario_A(x):
    x = blur_medium(x)
    x = noise_mild(x)
    if np.random.rand() < 0.7:
        x = dirty_mild(x)
    return x

def scenario_B(x):
    if np.random.rand() < 0.5:
        x = dark_mild(x)
    else:
        x = overexposed_mild(x)
    x = noise_mild(x)
    return x

def scenario_C(x):
    x = occlusion_small(x)
    if np.random.rand() < 0.5:
        x = bruised_mild(x)
    else:
        x = dirty_mild(x)
    return x

scenario_map = {
    "A": scenario_A,
    "B": scenario_B,
    "C": scenario_C,
}

print("Augmentation functions defined")

Augmentation functions defined


## Data Augmentation on Training Set

In [29]:
np.random.seed(RANDOM_STATE)

n_augment = int(len(X_train) * AUG_RATIO)
aug_indices = np.random.choice(len(X_train), n_augment, replace=False)

scenario_counts = {"A": 0, "B": 0, "C": 0}

for aug_idx in aug_indices:
    r = np.random.rand()
    if r < AUG_DIST["A"]:
        scenario_name = "A"
    elif r < AUG_DIST["A"] + AUG_DIST["B"]:
        scenario_name = "B"
    else:
        scenario_name = "C"
    scenario_counts[scenario_name] += 1
    img_tensor = torch.from_numpy(X_train[aug_idx]).float()
    aug_img = scenario_map[scenario_name](img_tensor)
    X_train[aug_idx] = aug_img.numpy()

print(f"Augmented {n_augment} images ({AUG_RATIO*100}% of training set)")
print(f"Scenario distribution: {scenario_counts}")

Augmented 18248 images (20.0% of training set)
Scenario distribution: {'A': 7293, 'B': 7358, 'C': 3597}


## Feature Extraction (Color Hist + HOG/LBP/GLCM/Gabor)

In [30]:
def color_hist_features(X, bins=HIST_BINS, img_shape=(3, SIZE, SIZE)):
    n_samples = X.shape[0]
    feats = np.zeros((n_samples, 3 * bins), dtype=np.float32)
    bin_edges = np.linspace(0.0, 1.0, bins + 1)
    for i in range(n_samples):
        img = X[i].reshape(img_shape)
        img = np.transpose(img, (1, 2, 0))
        img = np.clip(img, 0.0, 1.0)
        img_hsv = (img * 255.0).astype(np.uint8)
        img_hsv = cv2.cvtColor(img_hsv, cv2.COLOR_RGB2HSV)
        h, s, v = cv2.split(img_hsv)
        hists = []
        for channel in (h, s, v):
            ch_norm = channel.astype(np.float32) / 255.0
            hist, _ = np.histogram(ch_norm.ravel(), bins=bin_edges, density=True)
            hists.append(hist)
        feats[i] = np.concatenate(hists)
    return feats

def _prepare_img(Xi, img_shape):
    img = Xi.reshape(img_shape)
    img = np.transpose(img, (1, 2, 0))
    img = np.clip(img, 0.0, 1.0)
    return img

def _to_gray(img):
    return rgb2gray(img)

def hog_features(img_gray, pixels_per_cell=(8, 8), cells_per_block=(2, 2), orientations=9):
    return hog(
        img_gray,
        orientations=orientations,
        pixels_per_cell=pixels_per_cell,
        cells_per_block=cells_per_block,
        block_norm="L2-Hys",
        transform_sqrt=True,
        feature_vector=True,
    )

def lbp_features(img_gray, P=8, R=1):
    img_u8 = np.clip(img_gray * 255.0, 0, 255).astype(np.uint8)
    lbp = local_binary_pattern(img_u8, P=P, R=R, method="uniform")
    n_bins = P + 2
    hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, n_bins + 1), density=True)
    return hist

def glcm_features(img_gray, distances=(1, 2), angles=(0, np.pi / 4, np.pi / 2, 3 * np.pi / 4)):
    img_u8 = np.clip(img_gray * 255.0, 0, 255).astype(np.uint8)
    glcm = graycomatrix(
        img_u8,
        distances=distances,
        angles=angles,
        levels=256,
        symmetric=True,
        normed=True,
    )
    props = ["contrast", "dissimilarity", "homogeneity", "energy", "correlation", "ASM"]
    feats = [graycoprops(glcm, p).ravel() for p in props]
    return np.concatenate(feats)

def gabor_features(img_gray, frequencies=(0.1, 0.2, 0.3), thetas=(0, np.pi / 4, np.pi / 2, 3 * np.pi / 4)):
    feats = []
    for freq in frequencies:
        for theta in thetas:
            real, imag = gabor(img_gray, frequency=freq, theta=theta)
            mag = np.sqrt(real ** 2 + imag ** 2)
            feats.append(mag.mean())
            feats.append(mag.var())
    return np.array(feats, dtype=np.float32)

def compute_feature_blocks(
    X,
    img_shape=(3, SIZE, SIZE),
    color_bins=HIST_BINS,
    hog_params=None,
    lbp_params=None,
    glcm_params=None,
    gabor_params=None,
    feature_keys=None,
 ):
    hog_params = hog_params or {}
    lbp_params = lbp_params or {}
    glcm_params = glcm_params or {}
    gabor_params = gabor_params or {}
    if feature_keys is None:
        feature_keys = {"color_hist", "hog", "lbp", "glcm", "gabor"}
    else:
        feature_keys = set(feature_keys)

    blocks = {}
    if "color_hist" in feature_keys:
        blocks["color_hist"] = color_hist_features(X, bins=color_bins, img_shape=img_shape)

    need_gray = any(k in feature_keys for k in ("hog", "lbp", "glcm", "gabor"))
    if need_gray:
        hog_list, lbp_list, glcm_list, gabor_list = [], [], [], []
        for i in range(X.shape[0]):
            img = _prepare_img(X[i], img_shape)
            gray = _to_gray(img)
            if "hog" in feature_keys:
                hog_list.append(hog_features(gray, **hog_params))
            if "lbp" in feature_keys:
                lbp_list.append(lbp_features(gray, **lbp_params))
            if "glcm" in feature_keys:
                glcm_list.append(glcm_features(gray, **glcm_params))
            if "gabor" in feature_keys:
                gabor_list.append(gabor_features(gray, **gabor_params))
        if "hog" in feature_keys:
            blocks["hog"] = np.vstack(hog_list).astype(np.float32)
        if "lbp" in feature_keys:
            blocks["lbp"] = np.vstack(lbp_list).astype(np.float32)
        if "glcm" in feature_keys:
            blocks["glcm"] = np.vstack(glcm_list).astype(np.float32)
        if "gabor" in feature_keys:
            blocks["gabor"] = np.vstack(gabor_list).astype(np.float32)
    return blocks

def concat_feature_blocks(blocks, keys):
    return np.concatenate([blocks[k] for k in keys], axis=1)

FEATURE_KEYS = ["color_hist", "hog", "lbp", "glcm", "gabor"]
HOG_PARAMS = {"pixels_per_cell": (8, 8), "cells_per_block": (2, 2), "orientations": 9}
LBP_PARAMS = {"P": 8, "R": 1}
GLCM_PARAMS = {"distances": (1, 2), "angles": (0, np.pi / 4, np.pi / 2, 3 * np.pi / 4)}
GABOR_PARAMS = {"frequencies": (0.1, 0.2, 0.3), "thetas": (0, np.pi / 4, np.pi / 2, 3 * np.pi / 4)}

In [31]:
feature_sets = {
    "hsv+lbp": ["color_hist", "lbp"],
    "hsv+glcm": ["color_hist", "glcm"],
    "hsv+gabor": ["color_hist", "gabor"],
    "hsv+hog": ["color_hist", "hog"],
    "hsv+hog+lbp": ["color_hist", "hog", "lbp"],
}

scenarios_mixed = {
    "clean": lambda x: x,
    "scenario_A": scenario_A,
    "scenario_B": scenario_B,
    "scenario_C": scenario_C,
}
probs_distribution = [0.60, 0.15, 0.15, 0.10]

def evaluate_mixed_scenarios(test_loader, scenario_fns, probs, feature_keys, scaler, pca_model, clf, verbose=True):
    np.random.seed(RANDOM_STATE)
    torch.manual_seed(RANDOM_STATE)
    scenario_names = list(scenario_fns.keys())
    all_preds = []
    all_labels = []
    scenario_counts = {name: 0 for name in scenario_names}
    start = time.time()

    for imgs, labels in test_loader:
        imgs_batch = []
        for img in imgs:
            r = np.random.rand()
            if r < probs[0]:
                scenario = scenario_names[0]
            elif r < probs[0] + probs[1]:
                scenario = scenario_names[1]
            elif r < probs[0] + probs[1] + probs[2]:
                scenario = scenario_names[2]
            else:
                scenario = scenario_names[3]
            scenario_counts[scenario] += 1
            x = scenario_fns[scenario](img)
            imgs_batch.append(x.unsqueeze(0))

        imgs_batch = torch.cat(imgs_batch, dim=0)
        X = imgs_batch.numpy()
        blocks = compute_feature_blocks(
            X,
            img_shape=(3, SIZE, SIZE),
            color_bins=HIST_BINS,
            hog_params=HOG_PARAMS,
            lbp_params=LBP_PARAMS,
            glcm_params=GLCM_PARAMS,
            gabor_params=GABOR_PARAMS,
            feature_keys=feature_keys,
        )
        X_feat = concat_feature_blocks(blocks, feature_keys)
        X_sc = scaler.transform(X_feat)
        X_pca = pca_model.transform(X_sc)
        preds = clf.predict(X_pca)

        all_preds.extend(preds)
        all_labels.extend(labels.numpy())

    all_preds = np.array(all_preds)
    all_labels = np.array(all_labels)
    acc = (all_preds == all_labels).mean()
    elapsed = time.time() - start

    if verbose:
        print(f"Accuracy on mixed realistic distribution: {acc:.4f}")
        print(f"Time: {elapsed:.2f}s")
        print(f"Scenario distribution (actual): {scenario_counts}")

    return acc, elapsed, scenario_counts

results_all = []

In [33]:
print("=" * 70)
print("FEATURE COMBINATION 1: HSV + LBP")
print("=" * 70)

feature_keys = ["color_hist", "lbp"]
print(f"Feature keys: {feature_keys}")

start_total = time.time()

# Step 1: Extract features for train, val, test
print("\nExtracting features for training set...")
blocks_train = compute_feature_blocks(
    X_train,
    img_shape=(3, SIZE, SIZE),
    color_bins=HIST_BINS,
    hog_params=HOG_PARAMS,
    lbp_params=LBP_PARAMS,
    glcm_params=GLCM_PARAMS,
    gabor_params=GABOR_PARAMS,
    feature_keys=feature_keys,
)
X_train_feat = concat_feature_blocks(blocks_train, feature_keys)
print(f"Train features shape: {X_train_feat.shape}")

print("Extracting features for validation set...")
blocks_val = compute_feature_blocks(
    X_val,
    img_shape=(3, SIZE, SIZE),
    color_bins=HIST_BINS,
    hog_params=HOG_PARAMS,
    lbp_params=LBP_PARAMS,
    glcm_params=GLCM_PARAMS,
    gabor_params=GABOR_PARAMS,
    feature_keys=feature_keys,
)
X_val_feat = concat_feature_blocks(blocks_val, feature_keys)
print(f"Validation features shape: {X_val_feat.shape}")

print("Extracting features for test set...")
blocks_test = compute_feature_blocks(
    X_test,
    img_shape=(3, SIZE, SIZE),
    color_bins=HIST_BINS,
    hog_params=HOG_PARAMS,
    lbp_params=LBP_PARAMS,
    glcm_params=GLCM_PARAMS,
    gabor_params=GABOR_PARAMS,
    feature_keys=feature_keys,
)
X_test_feat = concat_feature_blocks(blocks_test, feature_keys)
print(f"Test features shape: {X_test_feat.shape}")

# Step 2: StandardScaler
print("\nApplying StandardScaler...")
scaler = StandardScaler()
X_train_sc = scaler.fit_transform(X_train_feat)
X_val_sc = scaler.transform(X_val_feat)
X_test_sc = scaler.transform(X_test_feat)

# Step 3: Parameter tuning on subset (variance target, C, gamma)
print("\nStarting parameter tuning on subset...")
tuning_subset_size = int(len(X_train_sc) * TUNING_SUBSET_RATIO)
X_train_subset = X_train_sc[:tuning_subset_size]
y_train_subset = y_train[:tuning_subset_size]

best_score = -np.inf
best_params = {}

for target_var in VARIANCE_TARGETS:
    # Compute number of components for this variance target
    pca_temp = PCA()
    pca_temp.fit(X_train_subset)
    cumsum = np.cumsum(pca_temp.explained_variance_ratio_)
    n_comp = int(np.argmax(cumsum >= target_var) + 1)
    n_comp = max(1, min(n_comp, len(cumsum)))
    
    X_train_subset_pca = pca_temp.transform(X_train_subset)[:, :n_comp]
    
    for C in C_VALUES:
        for gamma in GAMMA_VALUES:
            clf = SVC(kernel='rbf', C=C, gamma=gamma, random_state=RANDOM_STATE, verbose=0)
            scores = cross_val_score(clf, X_train_subset_pca, y_train_subset, cv=CV_FOLDS, scoring='accuracy')
            mean_score = scores.mean()
            
            if mean_score > best_score:
                best_score = mean_score
                best_params = {'variance_target': target_var, 'C': C, 'gamma': gamma, 'n_components': n_comp}

print(f"\nBest parameters: {best_params}")
print(f"Best CV score on subset: {best_score:.4f}")

# Step 4: Fit final PCA with best n_components
pca_final = PCA(n_components=best_params['n_components'])
X_train_pca = pca_final.fit_transform(X_train_sc)
X_val_pca = pca_final.transform(X_val_sc)
X_test_pca = pca_final.transform(X_test_sc)

# Step 5: Train final SVM with best C and gamma
print("\nTraining final SVM model...")
clf_final = SVC(kernel='rbf', C=best_params['C'], gamma=best_params['gamma'], random_state=RANDOM_STATE, verbose=0)
clf_final.fit(X_train_pca, y_train)

# Step 6: Evaluate on validation set
val_acc = clf_final.score(X_val_pca, y_val)
print(f"Validation accuracy: {val_acc:.4f}")

# Step 7: Evaluate on mixed degraded test set
print("\nEvaluating on mixed degraded test set...")
test_acc_mixed, test_time, scenario_counts = evaluate_mixed_scenarios(
    test_loader, scenarios_mixed, probs_distribution, feature_keys, scaler, pca_final, clf_final, verbose=True
)

# Step 8: Compile results
elapsed_total = time.time() - start_total
results_dict = {
    'feature_combination': 'HSV+LBP',
    'variance_target': best_params['variance_target'],
    'n_components': best_params['n_components'],
    'C': best_params['C'],
    'gamma': best_params['gamma'],
    'best_cv_score': best_score,
    'val_acc': val_acc,
    'test_acc_mixed': test_acc_mixed,
    'total_time': elapsed_total,
}

results_all.append(results_dict)
print(f"\nTotal execution time: {elapsed_total:.2f}s")
print(f"Results stored: {results_dict}")


FEATURE COMBINATION 1: HSV + LBP
Feature keys: ['color_hist', 'lbp']

Extracting features for training set...
Train features shape: (91240, 106)
Extracting features for validation set...
Validation features shape: (39104, 106)
Extracting features for test set...
Test features shape: (43442, 106)

Applying StandardScaler...

Starting parameter tuning on subset...

Best parameters: {'variance_target': 0.95, 'C': 100, 'gamma': 0.01, 'n_components': 47}
Best CV score on subset: 0.9653

Training final SVM model...
Validation accuracy: 0.9999

Evaluating on mixed degraded test set...
Accuracy on mixed realistic distribution: 0.9719
Time: 200.73s
Scenario distribution (actual): {'clean': 26070, 'scenario_A': 6650, 'scenario_B': 6421, 'scenario_C': 4301}

Total execution time: 737.05s
Results stored: {'feature_combination': 'HSV+LBP', 'variance_target': 0.95, 'n_components': 47, 'C': 100, 'gamma': 0.01, 'best_cv_score': np.float64(0.9652564602468656), 'val_acc': 0.9999488543371522, 'test_acc_m

In [34]:
print("\n" + "=" * 70)
print("FEATURE COMBINATION 2: HSV + GLCM")
print("=" * 70)

feature_keys = ["color_hist", "glcm"]
print(f"Feature keys: {feature_keys}")

start_total = time.time()

# Step 1: Extract features for train, val, test
print("\nExtracting features for training set...")
blocks_train = compute_feature_blocks(
    X_train,
    img_shape=(3, SIZE, SIZE),
    color_bins=HIST_BINS,
    hog_params=HOG_PARAMS,
    lbp_params=LBP_PARAMS,
    glcm_params=GLCM_PARAMS,
    gabor_params=GABOR_PARAMS,
    feature_keys=feature_keys,
)
X_train_feat = concat_feature_blocks(blocks_train, feature_keys)
print(f"Train features shape: {X_train_feat.shape}")

print("Extracting features for validation set...")
blocks_val = compute_feature_blocks(
    X_val,
    img_shape=(3, SIZE, SIZE),
    color_bins=HIST_BINS,
    hog_params=HOG_PARAMS,
    lbp_params=LBP_PARAMS,
    glcm_params=GLCM_PARAMS,
    gabor_params=GABOR_PARAMS,
    feature_keys=feature_keys,
)
X_val_feat = concat_feature_blocks(blocks_val, feature_keys)
print(f"Validation features shape: {X_val_feat.shape}")

print("Extracting features for test set...")
blocks_test = compute_feature_blocks(
    X_test,
    img_shape=(3, SIZE, SIZE),
    color_bins=HIST_BINS,
    hog_params=HOG_PARAMS,
    lbp_params=LBP_PARAMS,
    glcm_params=GLCM_PARAMS,
    gabor_params=GABOR_PARAMS,
    feature_keys=feature_keys,
)
X_test_feat = concat_feature_blocks(blocks_test, feature_keys)
print(f"Test features shape: {X_test_feat.shape}")

# Step 2: StandardScaler
print("\nApplying StandardScaler...")
scaler = StandardScaler()
X_train_sc = scaler.fit_transform(X_train_feat)
X_val_sc = scaler.transform(X_val_feat)
X_test_sc = scaler.transform(X_test_feat)

# Step 3: Parameter tuning on subset (variance target, C, gamma)
print("\nStarting parameter tuning on subset...")
tuning_subset_size = int(len(X_train_sc) * TUNING_SUBSET_RATIO)
X_train_subset = X_train_sc[:tuning_subset_size]
y_train_subset = y_train[:tuning_subset_size]

best_score = -np.inf
best_params = {}

for target_var in VARIANCE_TARGETS:
    # Compute number of components for this variance target
    pca_temp = PCA()
    pca_temp.fit(X_train_subset)
    cumsum = np.cumsum(pca_temp.explained_variance_ratio_)
    n_comp = int(np.argmax(cumsum >= target_var) + 1)
    n_comp = max(1, min(n_comp, len(cumsum)))
    
    X_train_subset_pca = pca_temp.transform(X_train_subset)[:, :n_comp]
    
    for C in C_VALUES:
        for gamma in GAMMA_VALUES:
            clf = SVC(kernel='rbf', C=C, gamma=gamma, random_state=RANDOM_STATE, verbose=0)
            scores = cross_val_score(clf, X_train_subset_pca, y_train_subset, cv=CV_FOLDS, scoring='accuracy')
            mean_score = scores.mean()
            
            if mean_score > best_score:
                best_score = mean_score
                best_params = {'variance_target': target_var, 'C': C, 'gamma': gamma, 'n_components': n_comp}

print(f"\nBest parameters: {best_params}")
print(f"Best CV score on subset: {best_score:.4f}")

# Step 4: Fit final PCA with best n_components
pca_final = PCA(n_components=best_params['n_components'])
X_train_pca = pca_final.fit_transform(X_train_sc)
X_val_pca = pca_final.transform(X_val_sc)
X_test_pca = pca_final.transform(X_test_sc)

# Step 5: Train final SVM with best C and gamma
print("\nTraining final SVM model...")
clf_final = SVC(kernel='rbf', C=best_params['C'], gamma=best_params['gamma'], random_state=RANDOM_STATE, verbose=0)
clf_final.fit(X_train_pca, y_train)

# Step 6: Evaluate on validation set
val_acc = clf_final.score(X_val_pca, y_val)
print(f"Validation accuracy: {val_acc:.4f}")

# Step 7: Evaluate on mixed degraded test set
print("\nEvaluating on mixed degraded test set...")
test_acc_mixed, test_time, scenario_counts = evaluate_mixed_scenarios(
    test_loader, scenarios_mixed, probs_distribution, feature_keys, scaler, pca_final, clf_final, verbose=True
)

# Step 8: Compile results
elapsed_total = time.time() - start_total
results_dict = {
    'feature_combination': 'HSV+GLCM',
    'variance_target': best_params['variance_target'],
    'n_components': best_params['n_components'],
    'C': best_params['C'],
    'gamma': best_params['gamma'],
    'best_cv_score': best_score,
    'val_acc': val_acc,
    'test_acc_mixed': test_acc_mixed,
    'total_time': elapsed_total,
}

results_all.append(results_dict)
print(f"\nTotal execution time: {elapsed_total:.2f}s")
print(f"Results stored: {results_dict}")



FEATURE COMBINATION 2: HSV + GLCM
Feature keys: ['color_hist', 'glcm']

Extracting features for training set...
Train features shape: (91240, 144)
Extracting features for validation set...
Validation features shape: (39104, 144)
Extracting features for test set...
Test features shape: (43442, 144)

Applying StandardScaler...

Starting parameter tuning on subset...

Best parameters: {'variance_target': 0.95, 'C': 100, 'gamma': 0.01, 'n_components': 43}
Best CV score on subset: 0.9640

Training final SVM model...
Validation accuracy: 1.0000

Evaluating on mixed degraded test set...
Accuracy on mixed realistic distribution: 0.9742
Time: 1184.66s
Scenario distribution (actual): {'clean': 26070, 'scenario_A': 6650, 'scenario_B': 6421, 'scenario_C': 4301}

Total execution time: 5912.42s
Results stored: {'feature_combination': 'HSV+GLCM', 'variance_target': 0.95, 'n_components': 43, 'C': 100, 'gamma': 0.01, 'best_cv_score': np.float64(0.9639961082045839), 'val_acc': 0.9999744271685761, 'test

In [35]:
print("\n" + "=" * 70)
print("FEATURE COMBINATION 3: HSV + GABOR")
print("=" * 70)

feature_keys = ["color_hist", "gabor"]
print(f"Feature keys: {feature_keys}")

start_total = time.time()

# Step 1: Extract features for train, val, test
print("\nExtracting features for training set...")
blocks_train = compute_feature_blocks(
    X_train,
    img_shape=(3, SIZE, SIZE),
    color_bins=HIST_BINS,
    hog_params=HOG_PARAMS,
    lbp_params=LBP_PARAMS,
    glcm_params=GLCM_PARAMS,
    gabor_params=GABOR_PARAMS,
    feature_keys=feature_keys,
)
X_train_feat = concat_feature_blocks(blocks_train, feature_keys)
print(f"Train features shape: {X_train_feat.shape}")

print("Extracting features for validation set...")
blocks_val = compute_feature_blocks(
    X_val,
    img_shape=(3, SIZE, SIZE),
    color_bins=HIST_BINS,
    hog_params=HOG_PARAMS,
    lbp_params=LBP_PARAMS,
    glcm_params=GLCM_PARAMS,
    gabor_params=GABOR_PARAMS,
    feature_keys=feature_keys,
)
X_val_feat = concat_feature_blocks(blocks_val, feature_keys)
print(f"Validation features shape: {X_val_feat.shape}")

print("Extracting features for test set...")
blocks_test = compute_feature_blocks(
    X_test,
    img_shape=(3, SIZE, SIZE),
    color_bins=HIST_BINS,
    hog_params=HOG_PARAMS,
    lbp_params=LBP_PARAMS,
    glcm_params=GLCM_PARAMS,
    gabor_params=GABOR_PARAMS,
    feature_keys=feature_keys,
)
X_test_feat = concat_feature_blocks(blocks_test, feature_keys)
print(f"Test features shape: {X_test_feat.shape}")

# Step 2: StandardScaler
print("\nApplying StandardScaler...")
scaler = StandardScaler()
X_train_sc = scaler.fit_transform(X_train_feat)
X_val_sc = scaler.transform(X_val_feat)
X_test_sc = scaler.transform(X_test_feat)

# Step 3: Parameter tuning on subset (variance target, C, gamma)
print("\nStarting parameter tuning on subset...")
tuning_subset_size = int(len(X_train_sc) * TUNING_SUBSET_RATIO)
X_train_subset = X_train_sc[:tuning_subset_size]
y_train_subset = y_train[:tuning_subset_size]

best_score = -np.inf
best_params = {}

for target_var in VARIANCE_TARGETS:
    # Compute number of components for this variance target
    pca_temp = PCA()
    pca_temp.fit(X_train_subset)
    cumsum = np.cumsum(pca_temp.explained_variance_ratio_)
    n_comp = int(np.argmax(cumsum >= target_var) + 1)
    n_comp = max(1, min(n_comp, len(cumsum)))
    
    X_train_subset_pca = pca_temp.transform(X_train_subset)[:, :n_comp]
    
    for C in C_VALUES:
        for gamma in GAMMA_VALUES:
            clf = SVC(kernel='rbf', C=C, gamma=gamma, random_state=RANDOM_STATE, verbose=0)
            scores = cross_val_score(clf, X_train_subset_pca, y_train_subset, cv=CV_FOLDS, scoring='accuracy')
            mean_score = scores.mean()
            
            if mean_score > best_score:
                best_score = mean_score
                best_params = {'variance_target': target_var, 'C': C, 'gamma': gamma, 'n_components': n_comp}

print(f"\nBest parameters: {best_params}")
print(f"Best CV score on subset: {best_score:.4f}")

# Step 4: Fit final PCA with best n_components
pca_final = PCA(n_components=best_params['n_components'])
X_train_pca = pca_final.fit_transform(X_train_sc)
X_val_pca = pca_final.transform(X_val_sc)
X_test_pca = pca_final.transform(X_test_sc)

# Step 5: Train final SVM with best C and gamma
print("\nTraining final SVM model...")
clf_final = SVC(kernel='rbf', C=best_params['C'], gamma=best_params['gamma'], random_state=RANDOM_STATE, verbose=0)
clf_final.fit(X_train_pca, y_train)

# Step 6: Evaluate on validation set
val_acc = clf_final.score(X_val_pca, y_val)
print(f"Validation accuracy: {val_acc:.4f}")

# Step 7: Evaluate on mixed degraded test set
print("\nEvaluating on mixed degraded test set...")
test_acc_mixed, test_time, scenario_counts = evaluate_mixed_scenarios(
    test_loader, scenarios_mixed, probs_distribution, feature_keys, scaler, pca_final, clf_final, verbose=True
)

# Step 8: Compile results
elapsed_total = time.time() - start_total
results_dict = {
    'feature_combination': 'HSV+GABOR',
    'variance_target': best_params['variance_target'],
    'n_components': best_params['n_components'],
    'C': best_params['C'],
    'gamma': best_params['gamma'],
    'best_cv_score': best_score,
    'val_acc': val_acc,
    'test_acc_mixed': test_acc_mixed,
    'total_time': elapsed_total,
}

results_all.append(results_dict)
print(f"\nTotal execution time: {elapsed_total:.2f}s")
print(f"Results stored: {results_dict}")



FEATURE COMBINATION 3: HSV + GABOR
Feature keys: ['color_hist', 'gabor']

Extracting features for training set...
Train features shape: (91240, 120)
Extracting features for validation set...
Validation features shape: (39104, 120)
Extracting features for test set...
Test features shape: (43442, 120)

Applying StandardScaler...

Starting parameter tuning on subset...

Best parameters: {'variance_target': 0.95, 'C': 100, 'gamma': 0.01, 'n_components': 50}
Best CV score on subset: 0.9708

Training final SVM model...
Validation accuracy: 1.0000

Evaluating on mixed degraded test set...
Accuracy on mixed realistic distribution: 0.9795
Time: 2640.63s
Scenario distribution (actual): {'clean': 26070, 'scenario_A': 6650, 'scenario_B': 6421, 'scenario_C': 4301}

Total execution time: 13062.78s
Results stored: {'feature_combination': 'HSV+GABOR', 'variance_target': 0.95, 'n_components': 50, 'C': 100, 'gamma': 0.01, 'best_cv_score': np.float64(0.9708461409704755), 'val_acc': 0.9999744271685761, '

In [36]:
print("\n" + "=" * 70)
print("FEATURE COMBINATION 4: HSV + HOG")
print("=" * 70)

feature_keys = ["color_hist", "hog"]
print(f"Feature keys: {feature_keys}")

start_total = time.time()

# Step 1: Extract features for train, val, test
print("\nExtracting features for training set...")
blocks_train = compute_feature_blocks(
    X_train,
    img_shape=(3, SIZE, SIZE),
    color_bins=HIST_BINS,
    hog_params=HOG_PARAMS,
    lbp_params=LBP_PARAMS,
    glcm_params=GLCM_PARAMS,
    gabor_params=GABOR_PARAMS,
    feature_keys=feature_keys,
)
X_train_feat = concat_feature_blocks(blocks_train, feature_keys)
print(f"Train features shape: {X_train_feat.shape}")

print("Extracting features for validation set...")
blocks_val = compute_feature_blocks(
    X_val,
    img_shape=(3, SIZE, SIZE),
    color_bins=HIST_BINS,
    hog_params=HOG_PARAMS,
    lbp_params=LBP_PARAMS,
    glcm_params=GLCM_PARAMS,
    gabor_params=GABOR_PARAMS,
    feature_keys=feature_keys,
)
X_val_feat = concat_feature_blocks(blocks_val, feature_keys)
print(f"Validation features shape: {X_val_feat.shape}")

print("Extracting features for test set...")
blocks_test = compute_feature_blocks(
    X_test,
    img_shape=(3, SIZE, SIZE),
    color_bins=HIST_BINS,
    hog_params=HOG_PARAMS,
    lbp_params=LBP_PARAMS,
    glcm_params=GLCM_PARAMS,
    gabor_params=GABOR_PARAMS,
    feature_keys=feature_keys,
)
X_test_feat = concat_feature_blocks(blocks_test, feature_keys)
print(f"Test features shape: {X_test_feat.shape}")

# Step 2: StandardScaler
print("\nApplying StandardScaler...")
scaler = StandardScaler()
X_train_sc = scaler.fit_transform(X_train_feat)
X_val_sc = scaler.transform(X_val_feat)
X_test_sc = scaler.transform(X_test_feat)

# Step 3: Parameter tuning on subset (variance target, C, gamma)
print("\nStarting parameter tuning on subset...")
tuning_subset_size = int(len(X_train_sc) * TUNING_SUBSET_RATIO)
X_train_subset = X_train_sc[:tuning_subset_size]
y_train_subset = y_train[:tuning_subset_size]

best_score = -np.inf
best_params = {}

for target_var in VARIANCE_TARGETS:
    # Compute number of components for this variance target
    pca_temp = PCA()
    pca_temp.fit(X_train_subset)
    cumsum = np.cumsum(pca_temp.explained_variance_ratio_)
    n_comp = int(np.argmax(cumsum >= target_var) + 1)
    n_comp = max(1, min(n_comp, len(cumsum)))
    
    X_train_subset_pca = pca_temp.transform(X_train_subset)[:, :n_comp]
    
    for C in C_VALUES:
        for gamma in GAMMA_VALUES:
            clf = SVC(kernel='rbf', C=C, gamma=gamma, random_state=RANDOM_STATE, verbose=0)
            scores = cross_val_score(clf, X_train_subset_pca, y_train_subset, cv=CV_FOLDS, scoring='accuracy')
            mean_score = scores.mean()
            
            if mean_score > best_score:
                best_score = mean_score
                best_params = {'variance_target': target_var, 'C': C, 'gamma': gamma, 'n_components': n_comp}

print(f"\nBest parameters: {best_params}")
print(f"Best CV score on subset: {best_score:.4f}")

# Step 4: Fit final PCA with best n_components
pca_final = PCA(n_components=best_params['n_components'])
X_train_pca = pca_final.fit_transform(X_train_sc)
X_val_pca = pca_final.transform(X_val_sc)
X_test_pca = pca_final.transform(X_test_sc)

# Step 5: Train final SVM with best C and gamma
print("\nTraining final SVM model...")
clf_final = SVC(kernel='rbf', C=best_params['C'], gamma=best_params['gamma'], random_state=RANDOM_STATE, verbose=0)
clf_final.fit(X_train_pca, y_train)

# Step 6: Evaluate on validation set
val_acc = clf_final.score(X_val_pca, y_val)
print(f"Validation accuracy: {val_acc:.4f}")

# Step 7: Evaluate on mixed degraded test set
print("\nEvaluating on mixed degraded test set...")
test_acc_mixed, test_time, scenario_counts = evaluate_mixed_scenarios(
    test_loader, scenarios_mixed, probs_distribution, feature_keys, scaler, pca_final, clf_final, verbose=True
)

# Step 8: Compile results
elapsed_total = time.time() - start_total
results_dict = {
    'feature_combination': 'HSV+HOG',
    'variance_target': best_params['variance_target'],
    'n_components': best_params['n_components'],
    'C': best_params['C'],
    'gamma': best_params['gamma'],
    'best_cv_score': best_score,
    'val_acc': val_acc,
    'test_acc_mixed': test_acc_mixed,
    'total_time': elapsed_total,
}

results_all.append(results_dict)
print(f"\nTotal execution time: {elapsed_total:.2f}s")
print(f"Results stored: {results_dict}")



FEATURE COMBINATION 4: HSV + HOG
Feature keys: ['color_hist', 'hog']

Extracting features for training set...
Train features shape: (91240, 420)
Extracting features for validation set...
Validation features shape: (39104, 420)
Extracting features for test set...
Test features shape: (43442, 420)

Applying StandardScaler...

Starting parameter tuning on subset...

Best parameters: {'variance_target': 0.95, 'C': 100, 'gamma': 0.001, 'n_components': 152}
Best CV score on subset: 0.9685

Training final SVM model...
Validation accuracy: 0.9999

Evaluating on mixed degraded test set...
Accuracy on mixed realistic distribution: 0.9611
Time: 429.65s
Scenario distribution (actual): {'clean': 26070, 'scenario_A': 6650, 'scenario_B': 6421, 'scenario_C': 4301}

Total execution time: 2147.26s
Results stored: {'feature_combination': 'HSV+HOG', 'variance_target': 0.95, 'n_components': 152, 'C': 100, 'gamma': 0.001, 'best_cv_score': np.float64(0.968544464802358), 'val_acc': 0.9998977086743044, 'test_

In [37]:
print("\n" + "=" * 70)
print("FEATURE COMBINATION 5: HSV + HOG + LBP")
print("=" * 70)

feature_keys = ["color_hist", "hog", "lbp"]
print(f"Feature keys: {feature_keys}")

start_total = time.time()

# Step 1: Extract features for train, val, test
print("\nExtracting features for training set...")
blocks_train = compute_feature_blocks(
    X_train,
    img_shape=(3, SIZE, SIZE),
    color_bins=HIST_BINS,
    hog_params=HOG_PARAMS,
    lbp_params=LBP_PARAMS,
    glcm_params=GLCM_PARAMS,
    gabor_params=GABOR_PARAMS,
    feature_keys=feature_keys,
)
X_train_feat = concat_feature_blocks(blocks_train, feature_keys)
print(f"Train features shape: {X_train_feat.shape}")

print("Extracting features for validation set...")
blocks_val = compute_feature_blocks(
    X_val,
    img_shape=(3, SIZE, SIZE),
    color_bins=HIST_BINS,
    hog_params=HOG_PARAMS,
    lbp_params=LBP_PARAMS,
    glcm_params=GLCM_PARAMS,
    gabor_params=GABOR_PARAMS,
    feature_keys=feature_keys,
)
X_val_feat = concat_feature_blocks(blocks_val, feature_keys)
print(f"Validation features shape: {X_val_feat.shape}")

print("Extracting features for test set...")
blocks_test = compute_feature_blocks(
    X_test,
    img_shape=(3, SIZE, SIZE),
    color_bins=HIST_BINS,
    hog_params=HOG_PARAMS,
    lbp_params=LBP_PARAMS,
    glcm_params=GLCM_PARAMS,
    gabor_params=GABOR_PARAMS,
    feature_keys=feature_keys,
)
X_test_feat = concat_feature_blocks(blocks_test, feature_keys)
print(f"Test features shape: {X_test_feat.shape}")

# Step 2: StandardScaler
print("\nApplying StandardScaler...")
scaler = StandardScaler()
X_train_sc = scaler.fit_transform(X_train_feat)
X_val_sc = scaler.transform(X_val_feat)
X_test_sc = scaler.transform(X_test_feat)

# Step 3: Parameter tuning on subset (variance target, C, gamma)
print("\nStarting parameter tuning on subset...")
tuning_subset_size = int(len(X_train_sc) * TUNING_SUBSET_RATIO)
X_train_subset = X_train_sc[:tuning_subset_size]
y_train_subset = y_train[:tuning_subset_size]

best_score = -np.inf
best_params = {}

for target_var in VARIANCE_TARGETS:
    # Compute number of components for this variance target
    pca_temp = PCA()
    pca_temp.fit(X_train_subset)
    cumsum = np.cumsum(pca_temp.explained_variance_ratio_)
    n_comp = int(np.argmax(cumsum >= target_var) + 1)
    n_comp = max(1, min(n_comp, len(cumsum)))
    
    X_train_subset_pca = pca_temp.transform(X_train_subset)[:, :n_comp]
    
    for C in C_VALUES:
        for gamma in GAMMA_VALUES:
            clf = SVC(kernel='rbf', C=C, gamma=gamma, random_state=RANDOM_STATE, verbose=0)
            scores = cross_val_score(clf, X_train_subset_pca, y_train_subset, cv=CV_FOLDS, scoring='accuracy')
            mean_score = scores.mean()
            
            if mean_score > best_score:
                best_score = mean_score
                best_params = {'variance_target': target_var, 'C': C, 'gamma': gamma, 'n_components': n_comp}

print(f"\nBest parameters: {best_params}")
print(f"Best CV score on subset: {best_score:.4f}")

# Step 4: Fit final PCA with best n_components
pca_final = PCA(n_components=best_params['n_components'])
X_train_pca = pca_final.fit_transform(X_train_sc)
X_val_pca = pca_final.transform(X_val_sc)
X_test_pca = pca_final.transform(X_test_sc)

# Step 5: Train final SVM with best C and gamma
print("\nTraining final SVM model...")
clf_final = SVC(kernel='rbf', C=best_params['C'], gamma=best_params['gamma'], random_state=RANDOM_STATE, verbose=0)
clf_final.fit(X_train_pca, y_train)

# Step 6: Evaluate on validation set
val_acc = clf_final.score(X_val_pca, y_val)
print(f"Validation accuracy: {val_acc:.4f}")

# Step 7: Evaluate on mixed degraded test set
print("\nEvaluating on mixed degraded test set...")
test_acc_mixed, test_time, scenario_counts = evaluate_mixed_scenarios(
    test_loader, scenarios_mixed, probs_distribution, feature_keys, scaler, pca_final, clf_final, verbose=True
)

# Step 8: Compile results
elapsed_total = time.time() - start_total
results_dict = {
    'feature_combination': 'HSV+HOG+LBP',
    'variance_target': best_params['variance_target'],
    'n_components': best_params['n_components'],
    'C': best_params['C'],
    'gamma': best_params['gamma'],
    'best_cv_score': best_score,
    'val_acc': val_acc,
    'test_acc_mixed': test_acc_mixed,
    'total_time': elapsed_total,
}

results_all.append(results_dict)
print(f"\nTotal execution time: {elapsed_total:.2f}s")
print(f"Results stored: {results_dict}")



FEATURE COMBINATION 5: HSV + HOG + LBP
Feature keys: ['color_hist', 'hog', 'lbp']

Extracting features for training set...
Train features shape: (91240, 430)
Extracting features for validation set...
Validation features shape: (39104, 430)
Extracting features for test set...
Test features shape: (43442, 430)

Applying StandardScaler...

Starting parameter tuning on subset...

Best parameters: {'variance_target': 0.95, 'C': 100, 'gamma': 0.001, 'n_components': 155}
Best CV score on subset: 0.9683

Training final SVM model...
Validation accuracy: 0.9999

Evaluating on mixed degraded test set...
Accuracy on mixed realistic distribution: 0.9621
Time: 362.14s
Scenario distribution (actual): {'clean': 26070, 'scenario_A': 6650, 'scenario_B': 6421, 'scenario_C': 4301}

Total execution time: 2168.57s
Results stored: {'feature_combination': 'HSV+HOG+LBP', 'variance_target': 0.95, 'n_components': 155, 'C': 100, 'gamma': 0.001, 'best_cv_score': np.float64(0.9683253467880443), 'val_acc': 0.999897

In [38]:
print("\n" + "=" * 70)
print("FINAL COMPARISON TABLE")
print("=" * 70)

# Create DataFrame from all results
results_df = pd.DataFrame(results_all)

# Sort by test_acc_mixed (best performance first)
results_df_sorted = results_df.sort_values('test_acc_mixed', ascending=False).reset_index(drop=True)

# Display full results table
print("\nDetailed Results (sorted by Mixed Test Accuracy):")
print(results_df_sorted.to_string(index=True))

# Create summary display
print("\n" + "-" * 70)
print("SUMMARY STATISTICS")
print("-" * 70)

print("\nBest Overall Performance:")
best_idx = results_df_sorted['test_acc_mixed'].idxmax()
best_result = results_df_sorted.loc[best_idx]
print(f"  Feature Combination: {best_result['feature_combination']}")
print(f"  Mixed Test Accuracy: {best_result['test_acc_mixed']:.4f}")
print(f"  Validation Accuracy: {best_result['val_acc']:.4f}")
print(f"  Best CV Score: {best_result['best_cv_score']:.4f}")
print(f"  Parameters: Variance={best_result['variance_target']:.2f}, C={best_result['C']}, Gamma={best_result['gamma']}")
print(f"  PCA Components: {best_result['n_components']}")
print(f"  Execution Time: {best_result['total_time']:.2f}s")

print("\nRanking by Mixed Test Accuracy:")
for idx, row in results_df_sorted.iterrows():
    print(f"  {idx+1}. {row['feature_combination']:20s} - Test Acc: {row['test_acc_mixed']:.4f}, Val Acc: {row['val_acc']:.4f}")

print("\nAverage Execution Time: {:.2f}s".format(results_df['total_time'].mean()))
print("Total Execution Time: {:.2f}s".format(results_df['total_time'].sum()))

print("\n" + "=" * 70)



FINAL COMPARISON TABLE

Detailed Results (sorted by Mixed Test Accuracy):
  feature_combination  variance_target  n_components    C  gamma  best_cv_score   val_acc  test_acc_mixed    total_time
0           HSV+GABOR             0.95            50  100  0.010       0.970846  0.999974        0.979467  13062.781675
1            HSV+GLCM             0.95            43  100  0.010       0.963996  0.999974        0.974218   5912.421256
2             HSV+LBP             0.95            47  100  0.010       0.965256  0.999949        0.971894    737.053712
3         HSV+HOG+LBP             0.95           155  100  0.001       0.968325  0.999898        0.962064   2168.565353
4             HSV+HOG             0.95           152  100  0.001       0.968544  0.999898        0.961075   2147.261732

----------------------------------------------------------------------
SUMMARY STATISTICS
----------------------------------------------------------------------

Best Overall Performance:
  Feature Combin