In [None]:
import os
import sys
import cv2
import time
import numpy as np

from image_dataset import ImageDatasetLoader, ImageFeaturesDataset

## 1. Raw image dataset loader

In [None]:
dir_dataset_bigcats = "F:/ml_datasets/RUG/pattern_recognition/assignment_2/BigCats/"

In [None]:
bigcats_raw_dataset_loader = ImageDatasetLoader(dir_dataset_bigcats)

In [None]:
bigcats_raw_dataset_loader.load_dataset()

In [None]:
bigcats_raw_dataset_loader.images.shape

In [None]:
bigcats_raw_dataset_loader.labels.shape

## 2. Generate and save SIFT + Bag Of Visual Words [BOVW] features [to be used only for image data]

In [None]:
from image_features import SIFTBagofVisualWordsFeatureExtractor

In [None]:
from learning_utils import get_encoded_labels, split_data

In [None]:
_, encoded_labels = get_encoded_labels(bigcats_raw_dataset_loader.labels)

In [None]:
train_x, test_x, train_y, test_y = split_data(bigcats_raw_dataset_loader.images, encoded_labels)

In [None]:
dir_bovw_features_data = "bovw_features_data"
if not os.path.isdir(dir_bovw_features_data):
    os.makedirs(dir_bovw_features_data)

#### 2.1 Generate BOVW with K-Means clustering

In [None]:
for num_words in range(5, 205, 5):
    bovw_feature_extractor = SIFTBagofVisualWordsFeatureExtractor(train_x, clustering_method="kmeans", num_visual_words=num_words)
    bovw_feature_extractor.init_sift()
    bovw_feature_extractor.compute_descriptor_on_train_set()
    bovw_feature_extractor.init_clustering()
    bovw_feature_extractor.fit_clustering_on_train_set()
    
    t1 = time.time()
    train_bovw = bovw_feature_extractor.get_train_image_histograms()
    t2 = time.time()
    print(f"Time taken for SIFT + BOVW features for train dataset : {t2 - t1} sec., num visual words : {num_words}")
    file_train = os.path.join(
        dir_bovw_features_data, 
        f"train_{bovw_feature_extractor.clustering_method}_{bovw_feature_extractor.num_visual_words}.npy"
    )
    np.save(file_train, train_bovw)
    
    t1 = time.time()
    test_bovw = bovw_feature_extractor.get_test_image_histograms(test_x)
    t2 = time.time()
    print(f"Time taken for SIFT + BOVW features for test dataset : {t2 - t1} sec., num visual words : {num_words}")
    file_test = os.path.join(
        dir_bovw_features_data,
        f"test_{bovw_feature_extractor.clustering_method}_{bovw_feature_extractor.num_visual_words}.npy"
    )
    np.save(file_test, test_bovw)

## 3. Save image label arrays

In [None]:
np.save(os.path.join(dir_bovw_features_data, "train_labels.npy"), train_y)

In [None]:
np.save(os.path.join(dir_bovw_features_data, "test_labels.npy"), test_y)