# **Retrieval with k-NN**

In [1]:
import sys

sys.path.insert(0, "..\\Scripts")

In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## **Load dataset**

In [3]:
from ImageDatastore import ImageDatastore
from torch.utils.data import DataLoader
from torchvision import transforms

In [4]:
batch_size = 512
transform = transforms.Compose(
    [
        transforms.Resize(
            (232, 232), interpolation=transforms.InterpolationMode.BILINEAR
        ),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)

In [5]:
train_small = ImageDatastore('train', transform=transform)
train_small_loader = DataLoader(train_small, batch_size=batch_size, shuffle=False)

### **Load Unlabelled Data**

In [6]:
train_unlabelled = pd.read_csv("../Dataset/train_unlabeled.csv", header=None)
train_unlabelled.columns = ["Image", "Label"]
train_unlabelled.head()

Unnamed: 0,Image,Label
0,train_059329.jpg,-1
1,train_059330.jpg,-1
2,train_059331.jpg,-1
3,train_059332.jpg,-1
4,train_059333.jpg,-1


In [7]:
n = train_unlabelled.shape[0] // 20_000
for i in range(n + 1):
    tmp = train_unlabelled.iloc[i * 20_000 : (i + 1) * 20_000]
    tmp = tmp.reset_index(drop=True)
    tmp.to_csv(f"../Dataset/train_unlabeled_{i}.csv", index=False, header=False)
    print(f"../Dataset/train_unlabeled_{i}.csv")

../Dataset/train_unlabeled_0.csv
../Dataset/train_unlabeled_1.csv
../Dataset/train_unlabeled_2.csv
../Dataset/train_unlabeled_3.csv
../Dataset/train_unlabeled_4.csv
../Dataset/train_unlabeled_5.csv


In [8]:
train_unlabelled = ImageDatastore("train_unlabeled_0", transform=transform)
train_loader = DataLoader(train_unlabelled, batch_size=batch_size, shuffle=False)

## **Extracting Features**

In [9]:
from NeuralFeatureExtractor import MobileNetFeatureExtractor

In [10]:
base_feature_path = '../Features/features/'
base_labels_path = '../Features/labels/'

feature_type = 'mobilenet_v3_classifier.npy'
feature_type_unlabelled = 'mobilenet_v3_classifier_unlabelled.npy'

In [11]:
feature_extractor = MobileNetFeatureExtractor()


In [12]:
if os.path.exists(os.path.join(base_feature_path, feature_type)) and os.path.exists(
    os.path.join(base_labels_path, feature_type)
):
    x_train_small = np.load(os.path.join(base_feature_path, feature_type))
    y_train_small = np.load(os.path.join(base_labels_path, feature_type))
else:
    x_train_small, y_train_small = feature_extractor.compute_features(
        train_small_loader
    )

In [13]:
if os.path.exists(
    os.path.join(base_feature_path, feature_type_unlabelled)
) and os.path.exists(os.path.join(base_labels_path, feature_type_unlabelled)):
    x_train_unlabelled = np.load(
        os.path.join(base_feature_path, feature_type_unlabelled)
    )
    y_train_unlabelled = np.load(
        os.path.join(base_labels_path, feature_type_unlabelled)
    )
else:
    x_train_unlabelled, y_train_unlabelled = feature_extractor.compute_features(
        train_loader
    )

### **Save Features**

In [14]:
np.save(
    os.path.join(base_feature_path, feature_type),
    x_train_small,
    allow_pickle=False,
)
np.save(
    os.path.join(os.path.join(base_labels_path, feature_type)),
    y_train_small,
    allow_pickle=False,
)

In [15]:
np.save(
    os.path.join(base_feature_path, feature_type_unlabelled),
    x_train_unlabelled,
    allow_pickle=False,
)
np.save(
    os.path.join(base_labels_path, feature_type_unlabelled),
    y_train_unlabelled,
    allow_pickle=False,
)

## **Retrieval with k-NN**

In [16]:
from ImageRetrieval import KNNRetrieval

In [17]:
retrieval = KNNRetrieval('', '', '', k=3)

In [18]:
labels, distances = retrieval.retrieve_images(x_train_unlabelled, x_train_small, y_train_small)

In [22]:
labels[0]

array([2760,  883,  569], dtype=int64)

In [20]:
distances[1]

array([0.4998598, 0.5460542, 0.5589403], dtype=float32)

In [21]:
y_train_small[labels[1]]

array([  0,  21, 165], dtype=int64)