# **Importing Libraries**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import regularizers
from keras import callbacks

from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# **Data Set Loading**

In [None]:
(ds_train, ds_val,ds_test), ds_info = tfds.load(
    'oxford_iiit_pet',
    split=['train[:90%]','train[90%:]', 'test'],
    shuffle_files=True,
    as_supervised=True,
    with_info=True,
)

Downloading and preparing dataset 773.52 MiB (download: 773.52 MiB, generated: 774.69 MiB, total: 1.51 GiB) to /root/tensorflow_datasets/oxford_iiit_pet/3.2.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]

Generating splits...:   0%|          | 0/2 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/3680 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/oxford_iiit_pet/3.2.0.incompleteUMMHO9/oxford_iiit_pet-train.tfrecord*...:…

Generating test examples...:   0%|          | 0/3669 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/oxford_iiit_pet/3.2.0.incompleteUMMHO9/oxford_iiit_pet-test.tfrecord*...: …

Dataset oxford_iiit_pet downloaded and prepared to /root/tensorflow_datasets/oxford_iiit_pet/3.2.0. Subsequent calls will reuse this data.


# **KNN Classification**

In [None]:
from tensorflow.keras.applications.resnet50 import preprocess_input

BATCH_SIZE = 32
AUTOTUNE = tf.data.AUTOTUNE
IMAGE_SIZE = 224

In [None]:
def processing(image, label):
  image = tf.cast(image,tf.float32)
  image = preprocess_input(image)
  image = tf.image.resize_with_pad(image,224,224, method ='bilinear',antialias = True)
  return image, label

In [None]:
def prepare_dataset(dataset):
  dataset = dataset.map(processing)
  dataset = dataset.batch(BATCH_SIZE)
  return dataset

In [None]:
ds_train = prepare_dataset(ds_train).cache().prefetch(buffer_size=AUTOTUNE)
ds_val = prepare_dataset(ds_val).cache().prefetch(buffer_size=AUTOTUNE)
ds_test = prepare_dataset(ds_test).cache().prefetch(buffer_size=AUTOTUNE)

In [None]:
def extract_lables(dataset):
  labels = []
  for data in dataset:
    for d in data[1]:
      s = tf.reshape(d,[])
      labels.append(s.numpy())
  return labels

In [None]:
ResNet50Model = tf.keras.applications.ResNet50(input_shape=(IMAGE_SIZE,IMAGE_SIZE,3), weights = 'imagenet',include_top=False, pooling='avg')


In [None]:
ResNet50Model.trainable = False

# **Obtaining Embeddings From ResNet50 Pretrained Model**

In [None]:
embeddings = ResNet50Model.predict(ds_train)



In [None]:
test_embeddings = ResNet50Model.predict(ds_test)



In [None]:
labels_train = extract_lables(ds_train)

In [None]:
labels_test = extract_lables(ds_test)

In [None]:
init_knn = KNeighborsClassifier(n_neighbors=7)
init_knn.fit(embeddings,labels_train)

# **k-NN Accuracy Result Before Hyperparameter Tuning**

In [None]:
predicted = init_knn.predict(test_embeddings)

accuracy = accuracy_score(labels_test, predicted)
print(f"Accuracy: {accuracy}")

Accuracy: 0.850640501499046


Obtaining Best "k" Value

In [None]:
from sklearn.model_selection import GridSearchCV, KFold

kf=KFold(n_splits=5,shuffle=True,random_state=42)
parameter={'n_neighbors': np.arange(2, 30, 1)}
knn=KNeighborsClassifier()
knn_cv=GridSearchCV(knn, param_grid=parameter, cv=kf, verbose=1)
knn_cv.fit(embeddings, labels_train)
print(knn_cv.best_params_)

Fitting 5 folds for each of 28 candidates, totalling 140 fits
{'n_neighbors': 9}


In [None]:
best_knn = KNeighborsClassifier(n_neighbors=9)
best_knn.fit(embeddings,labels_train)

# **k-NN Accuracy Result After Hyperparameter Tuning**

In [None]:
predicted = best_knn.predict(test_embeddings)

accuracy = accuracy_score(labels_test, predicted)
print(f"Accuracy: {accuracy}")

Accuracy: 0.8517307168165713
