In [1]:
from main import *
from bovw import *

In [2]:
import os
import random
import numpy as np

%matplotlib inline
from matplotlib import pyplot as plt

In [3]:
SEED = 42

random.seed(SEED)
np.random.seed(SEED)
os.environ["PYTHONHASHSEED"] = str(SEED)

In [4]:
data_train = Dataset(ImageFolder="../data/places_reduced/train")
data_test = Dataset(ImageFolder="../data/places_reduced/val")

len(data_train), len(data_test)

(8700, 2200)

In [5]:
random.shuffle(data_train)
random.shuffle(data_test)

In [6]:
# Experiment 1: Linear kernel with different C values
linear_c_results = {}

bovw_params = {
    "detector_type": "SIFT",
    "codebook_size": 512,
    "detector_kwargs": {"nfeatures": 1000},
}

print("=" * 60)
print("EXPERIMENT 1: LINEAR KERNEL + C VALUES")
print("=" * 60)

for C in [0.01, 0.1, 1.0, 10.0, 100.0]:
    print(f"\n=== Testing Linear SVM with C={C} ===")
    
    classifier_cls = SVC
    classifier_params = {
        "kernel": "linear",
        "C": C,
        "class_weight": "balanced",
        "random_state": SEED,
    }
    
    scores = cross_validate_bovw(
        dataset=data_train,
        bovw_kwargs=bovw_params,
        classifier_cls=classifier_cls,
        classifier_kwargs=classifier_params,
        n_splits=5
    )
    
    linear_c_results[C] = scores
    print(f"C={C} -> Test Accuracy: {scores.test.accuracy.mean:.4f} ± {scores.test.accuracy.std:.4f}")


print("\n" + "=" * 60)
print("LINEAR KERNEL: C COMPARISON")
print("=" * 60)
for C, result in linear_c_results.items():
    print(f"C={C:6.2f} -> Train: {result.train.accuracy.mean:.4f} ± {result.train.accuracy.std:.4f} | Test: {result.test.accuracy.mean:.4f} ± {result.test.accuracy.std:.4f}")

EXPERIMENT 1: LINEAR KERNEL + C VALUES

=== Testing Linear SVM with C=0.01 ===


Phase [Setup]: Extracting the descriptors:  49%|████▉     | 4304/8700 [00:01<00:01, 2348.49it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2343.49it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
100%|██████████| 5/5 [01:41<00:00, 20.28s/it]


C=0.01 -> Test Accuracy: 0.1743 ± 0.0766

=== Testing Linear SVM with C=0.1 ===


Phase [Setup]: Extracting the descriptors:  51%|█████     | 4412/8700 [00:01<00:01, 2738.76it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2690.84it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


100%|██████████| 5/5 [01:28<00:00, 17.68s/it]


C=0.1 -> Test Accuracy: 0.3047 ± 0.0131

=== Testing Linear SVM with C=1.0 ===


Phase [Setup]: Extracting the descriptors:  51%|█████     | 4446/8700 [00:01<00:01, 2755.87it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2722.48it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


100%|██████████| 5/5 [01:19<00:00, 15.96s/it]


C=1.0 -> Test Accuracy: 0.3343 ± 0.0039

=== Testing Linear SVM with C=10.0 ===


Phase [Setup]: Extracting the descriptors:  51%|█████     | 4439/8700 [00:01<00:01, 2768.13it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2723.18it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


100%|██████████| 5/5 [01:17<00:00, 15.54s/it]


C=10.0 -> Test Accuracy: 0.2858 ± 0.0062

=== Testing Linear SVM with C=100.0 ===


Phase [Setup]: Extracting the descriptors:  51%|█████     | 4437/8700 [00:01<00:01, 2768.78it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2710.05it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


100%|██████████| 5/5 [01:39<00:00, 19.92s/it]

C=100.0 -> Test Accuracy: 0.2628 ± 0.0171

LINEAR KERNEL: C COMPARISON
C=  0.01 -> Train: 0.1784 ± 0.0825 | Test: 0.1743 ± 0.0766
C=  0.10 -> Train: 0.3380 ± 0.0028 | Test: 0.3047 ± 0.0131
C=  1.00 -> Train: 0.4765 ± 0.0047 | Test: 0.3343 ± 0.0039
C= 10.00 -> Train: 0.6348 ± 0.0082 | Test: 0.2858 ± 0.0062
C=100.00 -> Train: 0.8069 ± 0.0065 | Test: 0.2628 ± 0.0171





In [7]:
# Experiment 2: RBF kernel with different C values
rbf_c_results = {}

bovw_params = {
    "detector_type": "SIFT",
    "codebook_size": 512,
    "detector_kwargs": {"nfeatures": 1000},
}

print("=" * 60)
print("EXPERIMENT 2: RBF KERNEL + C VALUES")
print("=" * 60)

for C in [0.01, 0.1, 1.0, 10.0, 100.0]:
    print(f"\n=== Testing RBF SVM with C={C} ===")
    
    classifier_cls = SVC
    classifier_params = {
        "kernel": "rbf",
        "C": C,
        "gamma": "scale",
        "class_weight": "balanced",
        "random_state": SEED,
    }
    
    scores = cross_validate_bovw(
        dataset=data_train,
        bovw_kwargs=bovw_params,
        classifier_cls=classifier_cls,
        classifier_kwargs=classifier_params,
        n_splits=5
    )
    
    rbf_c_results[C] = scores
    print(f"C={C} -> Test Accuracy: {scores.test.accuracy.mean:.4f} ± {scores.test.accuracy.std:.4f}")


print("\n" + "=" * 60)
print("RBF KERNEL: C COMPARISON")
print("=" * 60)
for C, result in rbf_c_results.items():
    print(f"C={C:6.2f} -> Train: {result.train.accuracy.mean:.4f} ± {result.train.accuracy.std:.4f} | Test: {result.test.accuracy.mean:.4f} ± {result.test.accuracy.std:.4f}")

EXPERIMENT 2: RBF KERNEL + C VALUES

=== Testing RBF SVM with C=0.01 ===


Phase [Setup]: Extracting the descriptors:  51%|█████     | 4438/8700 [00:01<00:01, 2749.86it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2730.88it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
100%|██████████| 5/5 [01:58<00:00, 23.77s/it]


C=0.01 -> Test Accuracy: 0.1668 ± 0.0788

=== Testing RBF SVM with C=0.1 ===


Phase [Setup]: Extracting the descriptors:  51%|█████     | 4446/8700 [00:01<00:01, 2763.24it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2736.63it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


100%|██████████| 5/5 [01:55<00:00, 23.05s/it]


C=0.1 -> Test Accuracy: 0.2971 ± 0.0111

=== Testing RBF SVM with C=1.0 ===


Phase [Setup]: Extracting the descriptors:  51%|█████     | 4441/8700 [00:01<00:01, 2753.67it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2721.14it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


100%|██████████| 5/5 [01:51<00:00, 22.29s/it]


C=1.0 -> Test Accuracy: 0.3357 ± 0.0038

=== Testing RBF SVM with C=10.0 ===


Phase [Setup]: Extracting the descriptors:  51%|█████     | 4440/8700 [00:01<00:01, 2728.19it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2722.88it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


100%|██████████| 5/5 [01:58<00:00, 23.80s/it]


C=10.0 -> Test Accuracy: 0.3298 ± 0.0107

=== Testing RBF SVM with C=100.0 ===


Phase [Setup]: Extracting the descriptors:  51%|█████     | 4411/8700 [00:01<00:01, 2742.49it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2714.15it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


100%|██████████| 5/5 [01:58<00:00, 23.66s/it]

C=100.0 -> Test Accuracy: 0.3234 ± 0.0097

RBF KERNEL: C COMPARISON
C=  0.01 -> Train: 0.1885 ± 0.0986 | Test: 0.1668 ± 0.0788
C=  0.10 -> Train: 0.3583 ± 0.0029 | Test: 0.2971 ± 0.0111
C=  1.00 -> Train: 0.7732 ± 0.0055 | Test: 0.3357 ± 0.0038
C= 10.00 -> Train: 1.0000 ± 0.0001 | Test: 0.3298 ± 0.0107
C=100.00 -> Train: 1.0000 ± 0.0000 | Test: 0.3234 ± 0.0097





In [8]:
# Experiment 3: Polynomial kernel with different C values
poly_c_results = {}

bovw_params = {
    "detector_type": "SIFT",
    "codebook_size": 512,
    "detector_kwargs": {"nfeatures": 1000},
}

print("=" * 60)
print("EXPERIMENT 3: POLYNOMIAL KERNEL + C VALUES")
print("=" * 60)

for C in [0.01, 0.1, 1.0, 10.0, 100.0]:
    print(f"\n=== Testing Poly SVM with C={C} ===")
    
    classifier_cls = SVC
    classifier_params = {
        "kernel": "poly",
        "degree": 3,  # cubic polynomial
        "C": C,
        "gamma": "scale",
        "class_weight": "balanced",
        "random_state": SEED,
    }
    
    scores = cross_validate_bovw(
        dataset=data_train,
        bovw_kwargs=bovw_params,
        classifier_cls=classifier_cls,
        classifier_kwargs=classifier_params,
        n_splits=5
    )
    
    poly_c_results[C] = scores
    print(f"C={C} -> Test Accuracy: {scores.test.accuracy.mean:.4f} ± {scores.test.accuracy.std:.4f}")


print("\n" + "=" * 60)
print("POLYNOMIAL KERNEL: C COMPARISON")
print("=" * 60)
for C, result in poly_c_results.items():
    print(f"C={C:6.2f} -> Train: {result.train.accuracy.mean:.4f} ± {result.train.accuracy.std:.4f} | Test: {result.test.accuracy.mean:.4f} ± {result.test.accuracy.std:.4f}")

EXPERIMENT 3: POLYNOMIAL KERNEL + C VALUES

=== Testing Poly SVM with C=0.01 ===


Phase [Setup]: Extracting the descriptors:  51%|█████     | 4438/8700 [00:01<00:01, 2765.44it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2733.25it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
100%|██████████| 5/5 [01:32<00:00, 18.52s/it]


C=0.01 -> Test Accuracy: 0.2383 ± 0.0387

=== Testing Poly SVM with C=0.1 ===


Phase [Setup]: Extracting the descriptors:  51%|█████▏    | 4463/8700 [00:01<00:01, 2761.11it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2743.37it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


100%|██████████| 5/5 [01:26<00:00, 17.38s/it]


C=0.1 -> Test Accuracy: 0.3275 ± 0.0045

=== Testing Poly SVM with C=1.0 ===


Phase [Setup]: Extracting the descriptors:  51%|█████     | 4441/8700 [00:01<00:01, 2767.91it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2737.27it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


100%|██████████| 5/5 [01:31<00:00, 18.30s/it]


C=1.0 -> Test Accuracy: 0.3208 ± 0.0135

=== Testing Poly SVM with C=10.0 ===


Phase [Setup]: Extracting the descriptors:  51%|█████     | 4453/8700 [00:01<00:01, 2754.59it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2733.29it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


100%|██████████| 5/5 [01:30<00:00, 18.14s/it]


C=10.0 -> Test Accuracy: 0.3278 ± 0.0057

=== Testing Poly SVM with C=100.0 ===


Phase [Setup]: Extracting the descriptors:  51%|█████▏    | 4462/8700 [00:01<00:01, 2760.70it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2737.87it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


100%|██████████| 5/5 [01:31<00:00, 18.31s/it]

C=100.0 -> Test Accuracy: 0.3296 ± 0.0045

POLYNOMIAL KERNEL: C COMPARISON
C=  0.01 -> Train: 0.2718 ± 0.0494 | Test: 0.2383 ± 0.0387
C=  0.10 -> Train: 0.5068 ± 0.0046 | Test: 0.3275 ± 0.0045
C=  1.00 -> Train: 0.9966 ± 0.0007 | Test: 0.3208 ± 0.0135
C= 10.00 -> Train: 1.0000 ± 0.0000 | Test: 0.3278 ± 0.0057
C=100.00 -> Train: 1.0000 ± 0.0000 | Test: 0.3296 ± 0.0045





In [9]:
# Summary: Compare all kernels at their best C values
print("=" * 80)
print("SUMMARY: ALL KERNELS COMPARISON")
print("=" * 80)

print("\nLINEAR KERNEL:")
for C, result in linear_c_results.items():
    print(f"  C={C:6.2f} -> Test: {result.test.accuracy.mean:.4f} ± {result.test.accuracy.std:.4f}")

print("\nRBF KERNEL:")
for C, result in rbf_c_results.items():
    print(f"  C={C:6.2f} -> Test: {result.test.accuracy.mean:.4f} ± {result.test.accuracy.std:.4f}")

print("\nPOLYNOMIAL KERNEL:")
for C, result in poly_c_results.items():
    print(f"  C={C:6.2f} -> Test: {result.test.accuracy.mean:.4f} ± {result.test.accuracy.std:.4f}")

# Find best configuration
best_acc = 0
best_config = None

for C, result in linear_c_results.items():
    if result.test.accuracy.mean > best_acc:
        best_acc = result.test.accuracy.mean
        best_config = ("linear", C, result)

for C, result in rbf_c_results.items():
    if result.test.accuracy.mean > best_acc:
        best_acc = result.test.accuracy.mean
        best_config = ("rbf", C, result)

for C, result in poly_c_results.items():
    if result.test.accuracy.mean > best_acc:
        best_acc = result.test.accuracy.mean
        best_config = ("poly", C, result)

print("\n" + "=" * 80)
print("BEST CONFIGURATION:")
print("=" * 80)
if best_config:
    kernel, C, result = best_config
    print(f"Kernel: {kernel}")
    print(f"C: {C}")
    print(f"Test Accuracy: {result.test.accuracy.mean:.4f} ± {result.test.accuracy.std:.4f}")
    print(f"Train Accuracy: {result.train.accuracy.mean:.4f} ± {result.train.accuracy.std:.4f}")
    print(f"Test F1: {result.test.f1.mean:.4f} ± {result.test.f1.std:.4f}")

SUMMARY: ALL KERNELS COMPARISON

LINEAR KERNEL:
  C=  0.01 -> Test: 0.1743 ± 0.0766
  C=  0.10 -> Test: 0.3047 ± 0.0131
  C=  1.00 -> Test: 0.3343 ± 0.0039
  C= 10.00 -> Test: 0.2858 ± 0.0062
  C=100.00 -> Test: 0.2628 ± 0.0171

RBF KERNEL:
  C=  0.01 -> Test: 0.1668 ± 0.0788
  C=  0.10 -> Test: 0.2971 ± 0.0111
  C=  1.00 -> Test: 0.3357 ± 0.0038
  C= 10.00 -> Test: 0.3298 ± 0.0107
  C=100.00 -> Test: 0.3234 ± 0.0097

POLYNOMIAL KERNEL:
  C=  0.01 -> Test: 0.2383 ± 0.0387
  C=  0.10 -> Test: 0.3275 ± 0.0045
  C=  1.00 -> Test: 0.3208 ± 0.0135
  C= 10.00 -> Test: 0.3278 ± 0.0057
  C=100.00 -> Test: 0.3296 ± 0.0045

BEST CONFIGURATION:
Kernel: rbf
C: 1.0
Test Accuracy: 0.3357 ± 0.0038
Train Accuracy: 0.7732 ± 0.0055
Test F1: 0.3276 ± 0.0034
