In [None]:
import os
import sys
import cv2
import time
import numpy as np

In [None]:
from learning_utils import get_encoded_labels, split_data
from image_dataset import ImageDatasetLoader, ImageFeaturesDataset

## 1. Raw image dataset loader

In [None]:
dir_dataset_bigcats = "F:/ml_datasets/RUG/pattern_recognition/assignment_2/BigCats/"

In [None]:
bigcats_raw_dataset_loader = ImageDatasetLoader(dir_dataset_bigcats)

In [None]:
bigcats_raw_dataset_loader.load_dataset()

In [None]:
bigcats_raw_dataset_loader.images.shape

In [None]:
bigcats_raw_dataset_loader.labels.shape

In [None]:
_, encoded_labels = get_encoded_labels(bigcats_raw_dataset_loader.labels)

In [None]:
train_x, test_x, train_y, test_y = split_data(bigcats_raw_dataset_loader.images, encoded_labels)

## 2. Generate and save SIFT + Bag Of Visual Words [BOVW] features [can be used for images with arbitrary sizes]

In [None]:
from image_features import SIFTBagofVisualWordsFeatureExtractor

In [None]:
dir_sift_data = "sift_features"
if not os.path.isdir(dir_sift_data):
    os.makedirs(dir_sift_data)

#### 2.1 Save BOVW features with K-Means clustering

In [None]:
for num_words in range(5, 205, 5):
    bovw_feature_extractor = SIFTBagofVisualWordsFeatureExtractor(train_x, clustering_method="kmeans", num_visual_words=num_words)
    bovw_feature_extractor.init_sift()
    bovw_feature_extractor.compute_descriptor_on_train_set()
    bovw_feature_extractor.init_clustering()
    bovw_feature_extractor.fit_clustering_on_train_set()
    
    t1 = time.time()
    train_bovw = bovw_feature_extractor.get_train_image_histograms()
    t2 = time.time()
    print(f"Time taken for SIFT + BOVW features for train dataset : {t2 - t1} sec., num visual words : {num_words}")
    file_train = os.path.join(
        dir_sift_data, 
        f"train_sift_{bovw_feature_extractor.num_visual_words}.npy"
    )
    np.save(file_train, train_bovw)
    
    t1 = time.time()
    test_bovw = bovw_feature_extractor.get_test_image_histograms(test_x)
    t2 = time.time()
    print(f"Time taken for SIFT + BOVW features for test dataset : {t2 - t1} sec., num visual words : {num_words}")
    file_test = os.path.join(
        dir_sift_data,
        f"test_sift_{bovw_feature_extractor.num_visual_words}.npy"
    )
    np.save(file_test, test_bovw)

#### 2.2 Save image label arrays

In [None]:
np.save(os.path.join(dir_sift_data, "train_labels.npy"), train_y)

In [None]:
np.save(os.path.join(dir_sift_data, "test_labels.npy"), test_y)

## 3. Generate and save Fourier transform features [can be used for images with arbitrary sizes]

In [None]:
from image_features import FastFourierTransformFeatureExtractor

In [None]:
dir_fft_data = "fft_features"
if not os.path.isdir(dir_fft_data):
    os.makedirs(dir_fft_data)

In [None]:
train_x.shape

In [None]:
test_x.shape

In [None]:
train_x[0].shape

#### 3.1 Save dimension reduced fft features

In [None]:
for num_dim in range(30, 80, 10):
    fft_feat_extractor = FastFourierTransformFeatureExtractor(target_dim=(num_dim, num_dim))
    all_train_feats = []
    all_test_feats = []
    
    for idx_train in range(len(train_x)):
        train_sample_feats = fft_feat_extractor.get_features_using_fft(train_x[idx_train])
        all_train_feats.append(train_sample_feats.astype(np.float32).reshape(-1))
        
    for idx_test in range(len(test_x)):
        test_sample_feats = fft_feat_extractor.get_features_using_fft(test_x[idx_test])
        all_test_feats.append(test_sample_feats.astype(np.float32).reshape(-1))
        
    all_train_feats = np.array(all_train_feats)
    all_test_feats = np.array(all_test_feats)
    print(all_train_feats.shape)
    print(all_test_feats.shape)
    np.save(os.path.join(dir_fft_data, f"train_fft_{num_dim}.npy"), all_train_feats)
    np.save(os.path.join(dir_fft_data, f"test_fft_{num_dim}.npy"), all_test_feats)

#### 3.2 Save image label arrays

In [None]:
np.save(os.path.join(dir_fft_data, "train_labels.npy"), train_y)

In [None]:
np.save(os.path.join(dir_fft_data, "test_labels.npy"), test_y)