# Lab 1. Bag of Words (BoW)

### Import libraries

In [1]:
from typing import List

from bow import BoW
from dataset import Dataset
from image_classifier import ImageClassifier

### Load helper functions

In [2]:
def build_vocabulary(dataset: List[str], vocabulary: str = 'vocabulary', feature_type: str = 'SIFT', size: int = 200, iterations: int = 40): #CAMBIAR SIZE E ITERACIONES
    """Build a vocabulary.

    Args:
        dataset: Paths to the training images.
        vocabulary: Relative path to the file (without extension) where the vocabulary will be saved.
        feature_type: Feature extractor { SIFT, SURF, KAZE }. ##SIFT O SURF SON MEJORES
        size: Number of words in the vocabulary. ## LO NORMAL 200,500...
        iterations: Maximum number of K-means iterations.

    """
    bow = BoW()
    bow.build_vocabulary(dataset, feature_type=feature_type, vocabulary_size=size, iterations=iterations)
    bow.save_vocabulary(vocabulary)

def train_classifier(dataset: List[str], vocabulary: str = 'vocabulary', classifier: str = 'classifier', iterations: int = 200): #SOLO ITERACIONES
    """Train an SVM classifier.

    Args:
        dataset: Paths to the training images.
        vocabulary: Relative path to the vocabulary file (without extension).
        classifier: Relative path to the file (without extension) where the classifier will be saved.
        iterations: Maximum number of SVM iterations. 

    """
    bow = BoW()
    bow.load_vocabulary(vocabulary)

    image_classifier = ImageClassifier(bow)
    image_classifier.train(dataset, iterations=iterations)
    image_classifier.save(classifier)

def predict(dataset: List[str], dataset_name: str = "", vocabulary: str = 'vocabulary', classifier: str = 'classifier'):
    """Perform inference on a dataset.

    Args:
        dataset: Paths to the images.
        dataset_name: Dataset descriptive name.
        vocabulary: Relative path to the vocabulary file (without extension).
        classifier: Relative path to the classifier file (without extension). 

    """
    bow = BoW()
    bow.load_vocabulary(vocabulary)

    image_classifier = ImageClassifier(bow)
    image_classifier.load(classifier)
    image_classifier.predict(dataset, dataset_name=dataset_name)

### Load datasets

In [3]:
training_set = Dataset.load('../dataset/training', '*.jpg')
validation_set = Dataset.load('../dataset/validation', '*.jpg')

### Build vocabulary and train a SVM classifier

In [4]:
build_vocabulary(training_set)
#train_classifier(training_set)


BUILDING DICTIONARY

ComputingSIFTdescriptors...
100%|██████████| 2985/2985 [01:25<00:00, 34.09image/s]

Clustering descriptors into200words using K-means...


### Perform inference on the training set

In [6]:
predict(training_set, "Training")



CLASSIFICATION RESULTS (TRAINING)

Confusion matrix

KNOWN/PREDICTED  Bedroom  Coast  Forest  Highway  Industrial  Inside city  Kitchen  Living room  Mountain  Office  Open country  Store  Street  Suburb  Tall building
Bedroom             93.0    0.0     2.0      0.0         3.0          2.0      0.0          9.0       0.0     1.0           0.0    2.0     3.0     0.0            1.0
Coast                0.0  227.0     2.0      0.0         1.0          0.0      0.0          0.0       7.0     0.0          21.0    0.0     0.0     2.0            0.0
Forest               0.0    0.0   226.0      0.0         0.0          0.0      0.0          0.0       1.0     0.0           1.0    0.0     0.0     0.0            0.0
Highway              0.0    4.0     1.0    146.0         2.0          1.0      0.0          0.0       2.0     0.0           0.0    1.0     1.0     0.0            2.0
Industrial           1.0    1.0     0.0      2.0       167.0          6.0      2.0          3.0       2.0     2.0  

### Perform inference on the validation set

In [7]:
predict(validation_set, "Validation")



CLASSIFICATION RESULTS (VALIDATION)

Confusion matrix

KNOWN/PREDICTED  Bedroom  Coast  Forest  Highway  Industrial  Inside city  Kitchen  Living room  Mountain  Office  Open country  Store  Street  Suburb  Tall building
Bedroom             20.0    2.0     2.0      3.0         6.0          5.0      4.0         38.0       0.0     5.0           1.0    9.0     2.0     0.0            3.0
Coast                2.0   57.0     2.0      5.0         3.0          0.0      1.0          1.0      16.0     0.0          12.0    0.0     0.0     0.0            1.0
Forest               0.0    0.0    87.0      0.0         0.0          0.0      0.0          0.0       9.0     0.0           3.0    0.0     0.0     1.0            0.0
Highway              0.0   18.0     0.0     53.0         6.0          2.0      2.0          0.0       3.0     0.0           5.0    1.0     4.0     1.0            5.0
Industrial           5.0    1.0     1.0      3.0        39.0         11.0      4.0          5.0       0.0     3.0