In [1]:
from main import *
from bovw import *

In [2]:
import os
import random
import numpy as np

%matplotlib inline
from matplotlib import pyplot as plt

In [3]:
SEED = 42

random.seed(SEED)
np.random.seed(SEED)
os.environ["PYTHONHASHSEED"] = str(SEED)

In [4]:
data_train = Dataset(ImageFolder="../data/places_reduced/train")
data_test = Dataset(ImageFolder="../data/places_reduced/val")

len(data_train), len(data_test)

(8700, 2200)

In [5]:
random.shuffle(data_train)
random.shuffle(data_test)

In [6]:
classifier_results = {}

for penalty in ["l1", "l2", "elasticnet"]:
    bovw_params = {
        "detector_type": "SIFT",
        "codebook_size": 512,
        "detector_kwargs": {"nfeatures": 1000},
    }
    
    classifier_cls = LogisticRegression
    classifier_params = {
        "max_iter": 1000,
        "class_weight": "balanced",
        "solver": "saga"
    }
    
    scores = cross_validate_bovw(
        dataset=data_train,
            bovw_kwargs=bovw_params,
            classifier_cls=classifier_cls,
            classifier_kwargs=classifier_params,
            n_splits=5
    )
        
    classifier_results[penalty] = scores


print("\nSIFT SUMMARY")
for size in classifier_results.keys():
    print(f"codebook_size={size} -> Accuracy Train: {classifier_results[size].train.accuracy.mean:.4f} ± {classifier_results[size].train.accuracy.std:.4f}")
    print(f"codebook_size={size} -> Accuracy Test: {classifier_results[size].test.accuracy.mean:.4f} ± {classifier_results[size].test.accuracy.std:.4f}")
    print()

Phase [Setup]: Extracting the descriptors:  49%|████▉     | 4300/8700 [00:01<00:01, 2335.78it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2321.18it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


100%|██████████| 5/5 [00:42<00:00,  8.47s/it]
Phase [Setup]: Extracting the descriptors:  50%|████▉     | 4342/8700 [00:01<00:01, 2715.78it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2662.44it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


100%|██████████| 5/5 [00:42<00:00,  8.41s/it]
Phase [Setup]: Extracting the descriptors:  50%|████▉     | 4320/8700 [00:01<00:01, 2678.37it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2662.23it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


100%|██████████| 5/5 [00:41<00:00,  8.32s/it]


SIFT SUMMARY
codebook_size=l1 -> Accuracy Train: 0.4486 ± 0.0050
codebook_size=l1 -> Accuracy Test: 0.3348 ± 0.0047

codebook_size=l2 -> Accuracy Train: 0.4482 ± 0.0015
codebook_size=l2 -> Accuracy Test: 0.3236 ± 0.0100

codebook_size=elasticnet -> Accuracy Train: 0.4498 ± 0.0083
codebook_size=elasticnet -> Accuracy Test: 0.3321 ± 0.0109






In [7]:
# Experiment: L2 penalty with different C values
l2_c_results = {}

bovw_params = {
    "detector_type": "SIFT",
    "codebook_size": 512,
    "detector_kwargs": {"nfeatures": 1000},
}

for C in [0.01, 0.1, 1.0, 10.0, 100.0]:
    print(f"\n=== Testing L2 with C={C} ===")
    
    classifier_cls = LogisticRegression
    classifier_params = {
        "C": C,
        "penalty": "l2",
        "max_iter": 1000,
        "class_weight": "balanced",
        "random_state": SEED,
    }
    
    scores = cross_validate_bovw(
        dataset=data_train,
        bovw_kwargs=bovw_params,
        classifier_cls=classifier_cls,
        classifier_kwargs=classifier_params,
        n_splits=5
    )
    
    l2_c_results[C] = scores
    print(f"C={C} -> Test Accuracy: {scores.test.accuracy.mean:.4f} ± {scores.test.accuracy.std:.4f}")


print("\n\n=== L2 PENALTY: C COMPARISON ===")
for C, result in l2_c_results.items():
    print(f"C={C:6.2f} -> Train: {result.train.accuracy.mean:.4f} ± {result.train.accuracy.std:.4f} | Test: {result.test.accuracy.mean:.4f} ± {result.test.accuracy.std:.4f}")


=== Testing L2 with C=0.01 ===


Phase [Setup]: Extracting the descriptors:  50%|████▉     | 4318/8700 [00:01<00:01, 2711.02it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2663.84it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


100%|██████████| 5/5 [00:32<00:00,  6.59s/it]


C=0.01 -> Test Accuracy: 0.2637 ± 0.0082

=== Testing L2 with C=0.1 ===


Phase [Setup]: Extracting the descriptors:  52%|█████▏    | 4481/8700 [00:01<00:01, 2645.66it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2628.43it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


100%|██████████| 5/5 [00:40<00:00,  8.10s/it]


C=0.1 -> Test Accuracy: 0.3012 ± 0.0093

=== Testing L2 with C=1.0 ===


Phase [Setup]: Extracting the descriptors:  49%|████▉     | 4289/8700 [00:01<00:01, 2728.42it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2678.74it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


100%|██████████| 5/5 [01:02<00:00, 12.49s/it]


C=1.0 -> Test Accuracy: 0.3321 ± 0.0135

=== Testing L2 with C=10.0 ===


Phase [Setup]: Extracting the descriptors:  52%|█████▏    | 4491/8700 [00:01<00:01, 2571.17it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2630.67it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


100%|██████████| 5/5 [01:42<00:00, 20.46s/it]


C=10.0 -> Test Accuracy: 0.3016 ± 0.0080

=== Testing L2 with C=100.0 ===


Phase [Setup]: Extracting the descriptors:  52%|█████▏    | 4534/8700 [00:01<00:01, 2685.60it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2656.86it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


100%|██████████| 5/5 [02:19<00:00, 27.84s/it]

C=100.0 -> Test Accuracy: 0.2808 ± 0.0056


=== L2 PENALTY: C COMPARISON ===
C=  0.01 -> Train: 0.2785 ± 0.0017 | Test: 0.2637 ± 0.0082
C=  0.10 -> Train: 0.3404 ± 0.0041 | Test: 0.3012 ± 0.0093
C=  1.00 -> Train: 0.4527 ± 0.0080 | Test: 0.3321 ± 0.0135
C= 10.00 -> Train: 0.5196 ± 0.0045 | Test: 0.3016 ± 0.0080
C=100.00 -> Train: 0.5332 ± 0.0086 | Test: 0.2808 ± 0.0056





In [8]:
# Experiment: L1 penalty with different C values
l1_c_results = {}

bovw_params = {
    "detector_type": "SIFT",
    "codebook_size": 512,
    "detector_kwargs": {"nfeatures": 1000},
}

for C in [0.01, 0.1, 1.0, 10.0, 100.0]:
    print(f"\n=== Testing L1 with C={C} ===")
    
    classifier_cls = LogisticRegression
    classifier_params = {
        "C": C,
        "penalty": "l1",
        "solver": "saga",
        "max_iter": 1000,
        "class_weight": "balanced",
        "random_state": SEED,
    }
    
    scores = cross_validate_bovw(
        dataset=data_train,
        bovw_kwargs=bovw_params,
        classifier_cls=classifier_cls,
        classifier_kwargs=classifier_params,
        n_splits=5
    )
    
    l1_c_results[C] = scores
    print(f"C={C} -> Test Accuracy: {scores.test.accuracy.mean:.4f} ± {scores.test.accuracy.std:.4f}")


print("\n\n=== L1 PENALTY: C COMPARISON ===")
for C, result in l1_c_results.items():
    print(f"C={C:6.2f} -> Train: {result.train.accuracy.mean:.4f} ± {result.train.accuracy.std:.4f} | Test: {result.test.accuracy.mean:.4f} ± {result.test.accuracy.std:.4f}")


=== Testing L1 with C=0.01 ===


Phase [Setup]: Extracting the descriptors:  49%|████▉     | 4290/8700 [00:01<00:01, 2713.88it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2661.13it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
100%|██████████| 5/5 [00:35<00:00,  7.16s/it]


C=0.01 -> Test Accuracy: 0.0919 ± 0.0002

=== Testing L1 with C=0.1 ===


Phase [Setup]: Extracting the descriptors:  50%|████▉     | 4311/8700 [00:01<00:01, 2723.15it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2678.22it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
100%|██████████| 5/5 [00:44<00:00,  8.86s/it]


C=0.1 -> Test Accuracy: 0.2052 ± 0.0095

=== Testing L1 with C=1.0 ===


Phase [Setup]: Extracting the descriptors:  52%|█████▏    | 4527/8700 [00:01<00:01, 2674.36it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2647.16it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


100%|██████████| 5/5 [01:21<00:00, 16.35s/it]


C=1.0 -> Test Accuracy: 0.3181 ± 0.0073

=== Testing L1 with C=10.0 ===


Phase [Setup]: Extracting the descriptors:  49%|████▉     | 4290/8700 [00:01<00:01, 2709.03it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2665.05it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


100%|██████████| 5/5 [05:21<00:00, 64.20s/it]


C=10.0 -> Test Accuracy: 0.2929 ± 0.0160

=== Testing L1 with C=100.0 ===


Phase [Setup]: Extracting the descriptors:  50%|████▉     | 4317/8700 [00:01<00:01, 2728.24it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2675.86it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


100%|██████████| 5/5 [12:05<00:00, 145.10s/it]

C=100.0 -> Test Accuracy: 0.2712 ± 0.0074


=== L1 PENALTY: C COMPARISON ===
C=  0.01 -> Train: 0.0920 ± 0.0000 | Test: 0.0919 ± 0.0002
C=  0.10 -> Train: 0.2115 ± 0.0047 | Test: 0.2052 ± 0.0095
C=  1.00 -> Train: 0.4125 ± 0.0049 | Test: 0.3181 ± 0.0073
C= 10.00 -> Train: 0.5311 ± 0.0040 | Test: 0.2929 ± 0.0160
C=100.00 -> Train: 0.5351 ± 0.0055 | Test: 0.2712 ± 0.0074





In [9]:
# Experiment: ElasticNet penalty with different C values
elasticnet_c_results = {}

bovw_params = {
    "detector_type": "SIFT",
    "codebook_size": 512,
    "detector_kwargs": {"nfeatures": 1000},
}

for C in [0.01, 0.1, 1.0, 10.0, 100.0]:
    print(f"\n=== Testing ElasticNet with C={C} ===")
    
    classifier_cls = LogisticRegression
    classifier_params = {
        "C": C,
        "penalty": "elasticnet",
        "solver": "saga",
        "l1_ratio": 0.5,  # Equal mix of L1 and L2
        "max_iter": 1000,
        "class_weight": "balanced",
        "random_state": SEED,
    }
    
    scores = cross_validate_bovw(
        dataset=data_train,
        bovw_kwargs=bovw_params,
        classifier_cls=classifier_cls,
        classifier_kwargs=classifier_params,
        n_splits=5
    )
    
    elasticnet_c_results[C] = scores
    print(f"C={C} -> Test Accuracy: {scores.test.accuracy.mean:.4f} ± {scores.test.accuracy.std:.4f}")


print("\n\n=== ELASTICNET PENALTY: C COMPARISON ===")
for C, result in elasticnet_c_results.items():
    print(f"C={C:6.2f} -> Train: {result.train.accuracy.mean:.4f} ± {result.train.accuracy.std:.4f} | Test: {result.test.accuracy.mean:.4f} ± {result.test.accuracy.std:.4f}")


=== Testing ElasticNet with C=0.01 ===


Phase [Setup]: Extracting the descriptors:  49%|████▉     | 4281/8700 [00:01<00:01, 2635.72it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2656.05it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
100%|██████████| 5/5 [00:36<00:00,  7.28s/it]


C=0.01 -> Test Accuracy: 0.0919 ± 0.0002

=== Testing ElasticNet with C=0.1 ===


Phase [Setup]: Extracting the descriptors:  52%|█████▏    | 4483/8700 [00:01<00:01, 2637.68it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2643.79it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


100%|██████████| 5/5 [00:46<00:00,  9.30s/it]


C=0.1 -> Test Accuracy: 0.2361 ± 0.0093

=== Testing ElasticNet with C=1.0 ===


Phase [Setup]: Extracting the descriptors:  52%|█████▏    | 4541/8700 [00:01<00:01, 2690.31it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2642.55it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


100%|██████████| 5/5 [00:56<00:00, 11.37s/it]


C=1.0 -> Test Accuracy: 0.3261 ± 0.0041

=== Testing ElasticNet with C=10.0 ===


Phase [Setup]: Extracting the descriptors:  49%|████▉     | 4286/8700 [00:01<00:01, 2716.31it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2669.93it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


100%|██████████| 5/5 [01:42<00:00, 20.53s/it]


C=10.0 -> Test Accuracy: 0.2959 ± 0.0123

=== Testing ElasticNet with C=100.0 ===


Phase [Setup]: Extracting the descriptors:  52%|█████▏    | 4534/8700 [00:01<00:01, 2682.96it/s]

Could not compute descriptors for image ../data/places_reduced/train/water_ice_snow/iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:03<00:00, 2659.79it/s]


Could not compute descriptors for image ../data/places_reduced/train/mountains_hills_desert_sky/sky_00001410.jpg of class 5.


100%|██████████| 5/5 [04:41<00:00, 56.27s/it]

C=100.0 -> Test Accuracy: 0.2766 ± 0.0065


=== ELASTICNET PENALTY: C COMPARISON ===
C=  0.01 -> Train: 0.0919 ± 0.0001 | Test: 0.0919 ± 0.0002
C=  0.10 -> Train: 0.2495 ± 0.0061 | Test: 0.2361 ± 0.0093
C=  1.00 -> Train: 0.4355 ± 0.0038 | Test: 0.3261 ± 0.0041
C= 10.00 -> Train: 0.5227 ± 0.0035 | Test: 0.2959 ± 0.0123
C=100.00 -> Train: 0.5330 ± 0.0032 | Test: 0.2766 ± 0.0065



