In [1]:
from main import *
from bovw import *
from plotting import *

In [2]:
import os
import random
import numpy as np

%matplotlib inline
from matplotlib import pyplot as plt

In [3]:
SEED = 42

random.seed(SEED)
np.random.seed(SEED)
os.environ["PYTHONHASHSEED"] = str(SEED)

In [4]:
data_train = Dataset(ImageFolder="../data/places_reduced/train")
data_test = Dataset(ImageFolder="../data/places_reduced/val")

len(data_train), len(data_test)

(8700, 2200)

In [5]:
random.shuffle(data_train)
random.shuffle(data_test)

enable_cache()

In [9]:
classifier_results = {}

codebook_sizes = [16, 32, 64, 128, 256]

for codebook_size in codebook_sizes:
    bovw_params = {
        "detector_type": "SIFT",
        "encoding_method": "fisher",
        "codebook_size": codebook_size,
        "descriptor_normalization": "L2",
        "joint_descriptor_normalization": "Standard",  #Necessary for Fisher Vectors in logistic regression
        "detector_kwargs": {"nfeatures": 1000},
        "dimensionality_reduction": "PCA", #Necessart for Fisher Vectors
        "dimensionality_reduction_kwargs": {"n_components": 64},
        "pyramid_levels": None, 
    }
    
    classifier_cls = LogisticRegression
    classifier_params = {
        "max_iter": 1000,
        "class_weight": "balanced",
    }
    
    scores = cross_validate_bovw(
        dataset=data_train,
        bovw_kwargs=bovw_params,
        classifier_cls=classifier_cls,
        classifier_kwargs=classifier_params,
        n_splits=5
    )
    
    classifier_results[codebook_size] = scores



print("\nSUMMARY")
for cb in classifier_results.keys():
    print(f"Cb Size={cb} -> Accuracy Train: {classifier_results[cb].train.accuracy.mean:.4f} ± {classifier_results[cb].train.accuracy.std:.4f}")
    print(f"Cb Size={cb} -> Accuracy Val: {classifier_results[cb].val.accuracy.mean:.4f} ± {classifier_results[cb].val.accuracy.std:.4f}")
    print()

Phase [Setup]: Extracting the descriptors:   6%|▌         | 512/8700 [00:02<00:31, 262.47it/s]

Could not compute descriptors for image ../data/places_reduced/train\water_ice_snow\iceberg_00000023.jpg of class 9.


Phase [Setup]: Extracting the descriptors:  59%|█████▊    | 5108/8700 [00:25<00:12, 291.08it/s]

Could not compute descriptors for image ../data/places_reduced/train\mountains_hills_desert_sky\sky_00001410.jpg of class 5.


Phase [Setup]: Extracting the descriptors: 100%|██████████| 8700/8700 [00:38<00:00, 228.66it/s]
 40%|████      | 2/5 [09:29<14:14, 284.77s/it]


KeyboardInterrupt: 

In [None]:
# Plot the validation accuracy vs pyramid levels
x_values = list(classifier_results.keys())
means = [classifier_results[cb].val.accuracy.mean for cb in x_values]
stds = [classifier_results[cb].val.accuracy.std for cb in x_values]

plot_cv_accuracy(
    x_values=x_values,
    means=means,
    stds=stds,
    descriptor_name="SIFT (codebook_size=512)",
    hyperparam_name="Pyramid Levels"
)