# Classification of Cats & Dogs using KNN Algorithm for both Euclidean Distance and Manhattan Distance

# Aquiring Data set from Kaggle.com

This section of the program installs th required Kaggle directories within Good Colab so that you can download datasets from Kaggle directly. To make sure that this section of the code works, please upload the "kaggle.json" file from the zip file into the "/content/" directory before running the code.

The data downloaded consist of a total of 25,000 images of cats and dogs. There is a training set of images which consist of 10,000 images of cats and dogs each and a test set of images consisting of 2,500 images of cats and dogs each.

In [None]:
! pip install kaggle
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json

In [None]:
! kaggle datasets download -d rotchymoricette/knn-machine-learning-data-set-catdog

In [None]:
! unzip knn-machine-learning-data-set-catdog.zip

# Image Featuring

Importing the images using Matplotlip and converting them into a Numpy Array

In [None]:
from skimage.feature import hog
import numpy as np
import cv2
import matplotlib.pyplot as plt
from matplotlib import image as mpimg
import os
import glob
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics

In [None]:
#image featuring technique
img = np.array(mpimg.imread('/content/training_set/dog/dog.3.jpg'))
img.setflags(write = 1)
print('image: ', img.shape)
plt.imshow(img)
plt.show()

In [None]:
#resizing the image
newImg = cv2.resize(img, (64, 128))
plt.imshow(newImg)
plt.show()
print(newImg.shape)

## Getting the HOG features of each image

In [None]:
#hog features
features_arr, hog_features = hog(newImg, visualize = True, multichannel = True)
print(features_arr.shape)
print(features_arr)
print(hog_features.shape)
plt.axis("off")
plt.imshow(hog_features, cmap = "gray")
plt.show()

# Creating training Sets from HOG features and labeling them

In [None]:
#training data
dogs_train = []
for entry in glob.glob("/content/training_set/dog/*.jpg"):
  img = np.array(mpimg.imread(entry))
  newImg = cv2.resize(img, (64, 32))
  features_arr = hog(newImg)
  dogs_train.append(features_arr)

cats_train = []
for entry in glob.glob("/content/training_set/cat/*.jpg"):
  img = np.array(mpimg.imread(entry))
  newImg = cv2.resize(img, (64, 32))
  features_arr = hog(newImg)
  cats_train.append(features_arr)

train_data = cats_train + dogs_train
print(len(train_data))

20000


In [None]:
#labeling the training data
train_label_list = []
for i in range(len(train_data)):
  if i < 10000:
    train_label_list.append('cat')
  else:
    train_label_list.append('dog')
print(train_label_list)
print(len(train_label_list))

# Creating test sets from HOG features and labeling them

In [None]:
#test dataset
dogs_test = []
for entry in glob.glob("/content/test_set/dog/*.jpg"):
  img = np.array(mpimg.imread(entry))
  newImg = cv2.resize(img, (64, 32))
  features_arr = hog(newImg)
  test_dict = {'data': features_arr, 'label': 'dog'}
  dogs_test.append(test_dict)

cats_test = []
for entry in glob.glob("/content/test_set/cat/*.jpg"):
  img = np.array(mpimg.imread(entry))
  newImg = cv2.resize(img, (64, 32))
  features_arr = hog(newImg)
  test_dict = {'data': features_arr, 'label': 'cat'}
  cats_test.append(test_dict)

test_data = dogs_test + cats_test
print(len(test_data))

5000


In [None]:
#separating data and labels into different lists
test_features = []
test_labels = []
for i in test_data:
  test_labels.append(i['label'])
  test_features.append(i['data'])
print(len(test_features))
print(test_labels)

# **Model Training - Euclidean Distance KNN**

In [None]:
#euclidean training and prediction
#KNN implementation
x_axis_k_points = []

#euclidiean distance
f1_eu = []
acc_eu = []
conf_matrix_eu = []

for k in range(10):
  #train data
  knn_eu = KNeighborsClassifier(n_neighbors = k + 1)
  knn_eu.fit(train_data, train_label_list)

  #KNN prediction
  prediction = knn_eu.predict(test_features)

  #accuracy
  acc_euclidean = knn_eu.score(test_features, test_labels)
  acc_eu.append(acc_euclidean)

  #confusion matrix of predictions
  conf_matrix_eu.append(metrics.confusion_matrix(test_labels, prediction))

  #f1 score
  f1_eu.append(metrics.f1_score(test_labels, prediction, pos_label = 'dog'))

  x_axis_k_points.append(k+1)

print("F1 Scores of Euclidean Distance KNN:", f1_eu)
print("Accuracies of Euclidean Distance KNN:", acc_eu)

In [None]:
# Euclidean Training and Prediction with optimal K Value
# Optimal K value for dataset of 10,000 images is Sqrt(10000) = 100
conf_matrix_OptimalK_eu = []

#Train data
knn_OptimalK_eu = KNeighborsClassifier(n_neighbors = 100)
knn_OptimalK_eu.fit(train_data, train_label_list)

#KNN prediction
prediction_eu = knn_OptimalK_eu.predict(test_features)

#accuracy
acc_optimalK_eu = knn_OptimalK_eu.score(test_features, test_labels)

#confusion matrix of predictions
conf_matrix_OptimalK_eu.append(metrics.confusion_matrix(test_labels, prediction_eu))

f1_optimalK_eu = metrics.f1_score(test_labels, prediction, pos_label = 'dog')

print("F1 Score of Euclidean Distance KNN with Optimal K:", f1_optimalK_eu)
print("Accuracy of Euclidean Distance KNN with Optimal K:", acc_optimalK_eu )


# **Model Training - Manhattan Distance KNN**

In [None]:
#manhattan training and prediction

#metrics
f1_man = []
acc_man = []
conf_matrix_man = []

for k in range(10):
  #train data
  knn_man = KNeighborsClassifier(n_neighbors = k + 1, p = 1)
  knn_man.fit(train_data, train_label_list)

  #KNN prediction
  prediction_man = knn_man.predict(test_features)

  #accuracy
  acc_manhattan = knn_man.score(test_features, test_labels)
  acc_man.append(acc_manhattan)

  #confusion matrix of predictions
  conf_matrix_man.append(metrics.confusion_matrix(test_labels, prediction_man))

  #f1 score
  f1_man.append(metrics.f1_score(test_labels, prediction_man, pos_label = 'dog'))

  x_axis_k_points.append(k+1)

print("F1 Scores of Manhattan Distance KNN:", f1_man)
print("Accuracies of Manhattan Distance KNN:", acc_man)

In [None]:
# Manhattan Training and Prediction with optimal K Value
# Optimal K value for dataset of 10,000 images is Sqrt(10000) = 100
conf_matrix_OptimalK_man = []

#Train data
knn_OptimalK_man = KNeighborsClassifier(n_neighbors = 100, p = 1)
knn_OptimalK_man.fit(train_data, train_label_list)

#KNN prediction
prediction_man = knn_OptimalK_man.predict(test_features)

#accuracy
acc_optimalK_man = knn_OptimalK_man.score(test_features, test_labels)

#confusion matrix of predictions
conf_matrix_OptimalK_man.append(metrics.confusion_matrix(test_labels, prediction_eu))

f1_optimalK_man = metrics.f1_score(test_labels, prediction, pos_label = 'dog')

print("F1 Score of Euclidean Distance KNN with Optimal K:", f1_optimalK_man)
print("Accuracy of Euclidean Distance KNN with Optimal K:", acc_optimalK_man)

# **Printing Data**

In [None]:
#scores
print("Manhattan Scores: ")
for i in range(len(f1_man)):
  print('For K = ', i + 1, ', F1 score = ', f1_man[i], ', Accuracy = ', acc_man[i], ', \nConfusion Matrix: \n', conf_matrix_man[i])

Manhattan Scores: 
For K =  1 , F1 score =  0.6904149834741095 , Accuracy =  0.6628 , 
Confusion Matrix: 
 [[1434 1066]
 [ 620 1880]]
For K =  2 , F1 score =  0.6385247722728282 , Accuracy =  0.6746 , 
Confusion Matrix: 
 [[1936  564]
 [1063 1437]]
For K =  3 , F1 score =  0.7152677279305355 , Accuracy =  0.6852 , 
Confusion Matrix: 
 [[1449 1051]
 [ 523 1977]]
For K =  4 , F1 score =  0.7020457767875229 , Accuracy =  0.7058 , 
Confusion Matrix: 
 [[1796  704]
 [ 767 1733]]
For K =  5 , F1 score =  0.735734664764622 , Accuracy =  0.7036 , 
Confusion Matrix: 
 [[1455 1045]
 [ 437 2063]]
For K =  6 , F1 score =  0.7297662739038053 , Accuracy =  0.7202 , 
Confusion Matrix: 
 [[1712  788]
 [ 611 1889]]
For K =  7 , F1 score =  0.7468175388967467 , Accuracy =  0.7136 , 
Confusion Matrix: 
 [[1456 1044]
 [ 388 2112]]
For K =  8 , F1 score =  0.7394512771996216 , Accuracy =  0.7246 , 
Confusion Matrix: 
 [[1669  831]
 [ 546 1954]]
For K =  9 , F1 score =  0.7478962131837307 , Accuracy =  0.71

In [None]:
#scores
print("Euclidean Scores: ")
for i in range(len(f1_eu)):
  print('For K = ', i + 1, ', F1 score = ', f1_eu[i], ', Accuracy = ', acc_eu[i], ', \nConfusion Matrix: \n', conf_matrix_eu[i])

Euclidean Scores: 
For K =  1 , F1 score =  0.6837137463531834 , Accuracy =  0.6314 , 
Confusion Matrix: 
 [[1165 1335]
 [ 508 1992]]
For K =  2 , F1 score =  0.6530944625407166 , Accuracy =  0.6592 , 
Confusion Matrix: 
 [[1692  808]
 [ 896 1604]]
For K =  3 , F1 score =  0.7165419783873649 , Accuracy =  0.659 , 
Confusion Matrix: 
 [[1140 1360]
 [ 345 2155]]
For K =  4 , F1 score =  0.7096774193548387 , Accuracy =  0.6832 , 
Confusion Matrix: 
 [[1480 1020]
 [ 564 1936]]
For K =  5 , F1 score =  0.7240307541305415 , Accuracy =  0.6626 , 
Confusion Matrix: 
 [[1100 1400]
 [ 287 2213]]
For K =  6 , F1 score =  0.7244560487380332 , Accuracy =  0.6834 , 
Confusion Matrix: 
 [[1336 1164]
 [ 419 2081]]
For K =  7 , F1 score =  0.7308745369624738 , Accuracy =  0.6658 , 
Confusion Matrix: 
 [[1060 1440]
 [ 231 2269]]
For K =  8 , F1 score =  0.7290856853135035 , Accuracy =  0.6794 , 
Confusion Matrix: 
 [[1240 1260]
 [ 343 2157]]
For K =  9 , F1 score =  0.7278824656659214 , Accuracy =  0.65