In [None]:
# useful libraries
import numpy as np
import pandas as pd
import cv2
from sklearn.cluster import KMeans
import pickle
from scipy.spatial.distance import cdist
import os
import glob
import torchvision.datasets
from torch.utils.data import DataLoader, Subset
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import matplotlib.pyplot as plt

In [None]:
# true labels
trainLabels = pd.read_csv('/kaggle/input/supervised-sets/train_labels.csv')
testLabels = pd.read_csv('//kaggle/input/supervised-sets/val_labels.csv')

trainClasses = trainLabels['label'].unique()
testClasses = testLabels['label'].unique()

In [None]:
# path to data
train_dir = '/kaggle/input/supervised-sets/processedData/processedData/processed_train_set'
val_dir = '/kaggle/input/supervised-sets/processedData/processedData/processed_val_set' # test directory

In [None]:
# creation of the training set of resized images
size = 224

fake_transforms = torchvision.transforms.Compose([
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Resize((size, size))
    ])


fake_training_set = torchvision.datasets.ImageFolder(root='/kaggle/input/supervised-sets/processedData/processedData/processed_train_set', transform=fake_transforms)
# divide the images in batches: "fake" loader in order to compute mean and std for normalization
fake_train_loader = DataLoader(fake_training_set, batch_size=64, shuffle=True, num_workers=4)

In [None]:
# compute the desired mean and standard deviation for the normalization
def tot_mean_std(loader):
    mean = 0
    std = 0
    count = 0
    for batch, _ in loader:
        batch_samples = batch.size(0)
        batch = batch.view(batch_samples, -1)
        # reshape to [batch_samples, 224*224]: ready to go into the SIFT feature extractor
        mean = mean + batch.mean(1).sum(0)  # mean over the pixels of each image of the batch summed to the others
        std = std + batch.std(1).sum(0)  # same for the standard deviation
        count = count + batch_samples

    mean = mean/count
    std = std/count

    return mean, std

In [None]:
# creation of the datasets with resized, grayscale and normalized images
size = 224
mean, std = tot_mean_std(fake_train_loader)

transforms = torchvision.transforms.Compose([
        torchvision.transforms.ToTensor(),
        torchvision.transforms.Resize((size, size)),
        torchvision.transforms.Normalize(mean = mean, std = std),
        torchvision.transforms.Grayscale()
    ])

training_set = torchvision.datasets.ImageFolder(root='/kaggle/input/supervised-sets/processedData/processedData/processed_train_set', transform=transforms)
test_set = torchvision.datasets.ImageFolder(root='/kaggle/input/supervised-sets/processedData/processedData/processed_test_set', transform=transforms)

# training set reduction: get a fixed percentage from each class
def get_subset(data, percentage):
    class_indices = {}
    for idx, (_, label) in enumerate(data.samples):
        if label not in class_indices:
            class_indices[label] = []
        class_indices[label].append(idx)

    subset_indices = []
    for label, indices in class_indices.items():
        np.random.shuffle(indices)
        n_subset = int(len(indices) * percentage)
        subset_indices.extend(indices[:n_subset])

    return Subset(data, subset_indices)

# grid search will be performed on 10% of the training set for computational reasons
subset_training = get_subset(training_set, percentage=0.1)


# "true" loader
train_loader = DataLoader(subset_training, batch_size=64, shuffle=True, num_workers=4)

In [None]:
# extract features with SIFT
def sift_features(image_list):
    descriptors = []
    valid_indices = []
    sift = cv2.SIFT_create()
    for i, image in enumerate(image_list):
        if image is not None:
            image_np = image.numpy().transpose(1, 2, 0).astype(np.uint8)
            _, descriptor = sift.detectAndCompute(image_np, None)
            if descriptor is not None:
                descriptors.append(descriptor)
                valid_indices.append(i)
        else:
            print("Image is none")

    return descriptors, valid_indices

In [None]:
# create BOW dictionary with k-means applied on SIFT descriptors to compute centroids
# G is the number of words in the vocaboulary
def bow_dictionary(descriptors, G):
    bow_dict = []

    kmeans = KMeans(n_clusters = G)
    kmeans.fit(descriptors)

    bow_dict = kmeans.cluster_centers_

    if not os.path.isfile('bow_dictionary.pkl'):
        pickle.dump(bow_dict, open('bow_dictionary.pkl', 'wb'))

    return bow_dict

In [None]:
# extract BoW features (histograms)
def bow_features(descriptors, centers, G):
    bow_features = []
    for descriptor_set in descriptors:
        for descriptor in descriptor_set:
            if descriptor is not None:
                features = np.zeros(G)
                distance = cdist(descriptor, centers)
                minimum = np.argmin(distance, axis=1)
                for index in minimum:
                    features[index] += 1
                bow_features.append(features)
            else:
                print("Null descriptor")
    return bow_features

In [None]:
# extract BoW features from training images
import sklearn
from sklearn.model_selection import GridSearchCV

all_train_descriptors = []
training_descriptors = []
training_labels = []
valid_indices_all = []

for i, data in enumerate(train_loader, 0):
    data_train, train_labels = data
    train_descriptors, valid_indices = sift_features(data_train)
    training_descriptors.append(train_descriptors)
    training_labels.extend(train_labels[valid_indices])
    valid_indices_all.extend(valid_indices)
    for descriptor in train_descriptors:
        if descriptor is not None:
            all_train_descriptors.extend(descriptor)

In [None]:
G = 380
bow_dict_train = bow_dictionary(all_train_descriptors, G)
train_features = bow_features(training_descriptors, bow_dict_train, G)

In [None]:
# GRID SEARCH: 5 folds cv
svm_model = sklearn.svm.SVC()
parameters = [{'C': [20, 25, 30, 35, 40, 45], 'kernel' : ['rbf', 'poly']}]
grid_search = GridSearchCV(estimator = svm_model, param_grid = parameters, scoring = 'accuracy', cv = 5)
training_labels = [tensor.item() for tensor in training_labels]
Y_train = training_labels
grid_search.fit(train_features, Y_train)

In [None]:
print("Optimal parameters: ", grid_search.best_params_)