In [1]:
import os
import sys
import cv2
import time
import numpy as np

from image_dataset import ImageDatasetLoader, ImageFeaturesDataset

## 1. Raw image dataset loader

In [2]:
dir_dataset_bigcats = "F:/ml_datasets/RUG/pattern_recognition/assignment_2/BigCats/"

In [3]:
bigcats_raw_dataset_loader = ImageDatasetLoader(dir_dataset_bigcats)

In [4]:
bigcats_raw_dataset_loader.load_dataset()

dataset directory : F:/ml_datasets/RUG/pattern_recognition/assignment_2/BigCats/
found the following image categories in the dataset directory
['Cheetah', 'Jaguar', 'Leopard', 'Lion', 'Tiger']
num images : 38, image category: Cheetah
num images : 30, image category: Jaguar
num images : 31, image category: Leopard
num images : 32, image category: Lion
num images : 39, image category: Tiger


In [5]:
bigcats_raw_dataset_loader.images.shape

(170,)

In [6]:
bigcats_raw_dataset_loader.labels.shape

(170,)

## 2. SIFT + Bag of visual words feature extraction [to be used only for image data]

In [7]:
from image_features import SIFTFeatureExtractor

In [8]:
from learning_utils import get_encoded_labels, split_data

In [9]:
_, encoded_labels = get_encoded_labels(bigcats_raw_dataset_loader.labels)

In [10]:
train_x, test_x, train_y, test_y = split_data(bigcats_raw_dataset_loader.images, encoded_labels)

In [11]:
sift_extractor = SIFTFeatureExtractor(train_x, )

In [12]:
sift_extractor.init_sift()
sift_extractor.compute_descriptor_on_train_set()
sift_extractor.init_k_means()
sift_extractor.fit_k_means_on_train_set()

In [13]:
t1 = time.time()
train_hist = sift_extractor.get_train_image_histograms()
t2 = time.time()
print(f"Time taken for SIFT + Bag of visual words features for train dataset : {t2 - t1} sec.")

Time taken for SIFT + Bag of visual words features for train dataset : 622.7762327194214 sec.


In [14]:
t1 = time.time()
test_hist = sift_extractor.get_test_image_histograms(test_x)
t2 = time.time()
print(f"Time taken for SIFT + Bag of visual words features for test dataset : {t2 - t1} sec.")

Time taken for SIFT + Bag of visual words features for test dataset : 186.43226027488708 sec.


## 3. Create a dataset with the extracted bag of visual words features

#### Convert numpy arrays to pandas dataframe

In [15]:
from learning_utils import get_logistic_regression_model, get_standard_scaler, get_tfidf_transformer, get_learning_pipeline, compute_classification_metrics

In [16]:
train_dataset = ImageFeaturesDataset(train_hist, train_y)
train_dataset.convert_dataset()

In [17]:
test_dataset = ImageFeaturesDataset(test_hist, test_y)
test_dataset.convert_dataset()

## 4. Experiment with various pipelines

In [18]:
list_pipelines = [
    [("scaler", get_standard_scaler()), ("log_reg", get_logistic_regression_model(150))],
    [("tfidf_transformer", get_tfidf_transformer()), ("log_reg", get_logistic_regression_model(150))]
]

In [19]:
for pipeline in list_pipelines:
    train_x = train_dataset.image_features
    train_y = train_dataset.labels
    
    test_x = test_dataset.image_features
    test_y = test_dataset.labels
    
    learning_pipeline = get_learning_pipeline(pipeline)
    print("learning pipeline")
    print(learning_pipeline)
    learning_pipeline.fit(train_x, train_y)
    test_pred = learning_pipeline.predict(test_x)
    
    compute_classification_metrics(test_y.label.to_numpy().ravel(), test_pred)
    print("")

learning pipeline
Pipeline(steps=[('scaler', StandardScaler()),
                ('log_reg', LogisticRegression(max_iter=150))])
----------------------
classification metrics
----------------------
accuracy : 0.5294
f1 score : 0.5312
confustion matrix
[[8 1 1 1 1]
 [0 2 2 0 0]
 [1 4 1 1 0]
 [0 0 0 3 2]
 [0 0 0 2 4]]

learning pipeline
Pipeline(steps=[('tfidf_transformer', TfidfTransformer()),
                ('log_reg', LogisticRegression(max_iter=150))])
----------------------
classification metrics
----------------------
accuracy : 0.5294
f1 score : 0.5057
confustion matrix
[[5 2 1 2 2]
 [0 3 1 0 0]
 [0 3 1 2 1]
 [0 0 0 3 2]
 [0 0 0 0 6]]



  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
