**Cell 1**

In [19]:
import tensorflow as tf
import matplotlib.pyplot as plt
import glob
import os
from sklearn.model_selection import train_test_split
import shutil
import numpy as np

**Cell 2**

In [2]:
print('There are {} images of cats in the dataset'.format(len(glob.glob('C:/Users/pmoun/ML_FINAL_KNN/PetImages/Cat/*jpg'))))
print('There are {} images of dogs in the dataset'.format(len(glob.glob('C:/Users/pmoun/ML_FINAL_KNN/PetImages/Dog/*jpg'))))
print('There are {} images of birds in the dataset'.format(len(glob.glob('C:/Users/pmoun/ML_FINAL_KNN/PetImages/Bird/birds/*jpg'))))

There are 0 images of cats in the dataset
There are 0 images of dogs in the dataset
There are 0 images of birds in the dataset


**Cell 3**

In [3]:
# Do not start and break thread when stepping and breaking

# The path to my cats dataset folder
dataset_path_cats = "C:/Users/Gaurav Kharel/Desktop/Spring 2023/Machine Learning/Final_Project_CNN/PetImages/Cat"

# The path to my dogs dataset folder
dataset_path_dogs = "C:/Users/Gaurav Kharel/Desktop/Spring 2023/Machine Learning/Final_Project_CNN/PetImages/Dog"

# The path to birts dataset folder

dataset_path_birds = "C:/Users/Gaurav Kharel/Desktop/Spring 2023/Machine Learning/Final_Project_CNN/PetImages/Bird"


# The path to my output folder
output_path = "C:/Users/Gaurav Kharel/Desktop/Spring 2023/Machine Learning/Final_Project_CNN/PetImages_Split"

# defining the train, validation, and test ratio
train_ratio = 0.8
val_ratio = 0.1
test_ratio = 0.1


**Cell 4**

In [4]:
for dataset_path, animal in [(dataset_path_cats, "cats"), (dataset_path_dogs, "dogs"), (dataset_path_birds, "birds")]:
    # getting the list of all image files in the dataset folder for the animal
    image_files = [os.path.join(dataset_path, f) for f in os.listdir(dataset_path) if f.endswith(".jpg")]

    # splitting the dataset into training, validation, and test sets for the animal
    train_files, testval_files = train_test_split(image_files, train_size=train_ratio, random_state=42)
    val_files, test_files = train_test_split(testval_files, train_size=val_ratio/(val_ratio+test_ratio), random_state=42)

    # creating the output directories for the animal
    os.makedirs(os.path.join(output_path, "train", animal), exist_ok=True)
    os.makedirs(os.path.join(output_path, "val", animal), exist_ok=True)
    os.makedirs(os.path.join(output_path, "test", animal), exist_ok=True)

    # copying the training set to the output folder
    for file in train_files:
        shutil.copy(file, os.path.join(output_path, "train", animal, os.path.basename(file)))

    # copying the validation set to the output folder
    for file in val_files:
        shutil.copy(file, os.path.join(output_path, "val", animal, os.path.basename(file)))

    # copying the test set to the output folder
    for file in test_files:
        shutil.copy(file, os.path.join(output_path, "test", animal, os.path.basename(file)))

**Cell 5**

In [5]:
from tensorflow.keras.utils import load_img, img_to_array
import numpy as np
from PIL import UnidentifiedImageError

train_dir = "C:/Users/Gaurav Kharel/Desktop/Spring 2023/Machine Learning/Final_Project_CNN/PetImages_Split/train"
test_dir = "C:/Users/Gaurav Kharel/Desktop/Spring 2023/Machine Learning/Final_Project_CNN/PetImages_Split/test"

train_data = []
train_labels = []
for label, animal in enumerate(['cats', 'dogs', 'birds']):
    for img_path in os.listdir(os.path.join(train_dir, animal)):
        try:
            img = load_img(os.path.join(train_dir, animal, img_path), target_size=(64, 64))
            img = img_to_array(img)
            img = img / 255.0  # normalizing the pixel values to be between 0 and 1
            train_data.append(img)
            train_labels.append(label)
        except (UnidentifiedImageError, OSError):
            print(f"Skipping {img_path} due to an error")


train_data = np.array(train_data)
train_labels = np.array(train_labels)

# load and preprocess the test data
test_data = []
test_labels = []
for label, animal in enumerate(['cats', 'dogs', 'birds']):
    for img_path in os.listdir(os.path.join(test_dir, animal)):
        try:
            img = load_img(os.path.join(test_dir, animal, img_path), target_size=(64, 64))
            img = img_to_array(img)
            img = img / 255.0  # normalizing pixel values to be between 0 and 1
            test_data.append(img)
            test_labels.append(label)
        except (UnidentifiedImageError, OSError):
            print(f"Skipping {img_path} due to an error")

test_data = np.array(test_data)
test_labels = np.array(test_labels)




**Cell 6**

In [16]:
val_dir = "C:/Users/Gaurav Kharel/Desktop/Spring 2023/Machine Learning/Final_Project_CNN/PetImages_Split/val"
# For Validation
val_data = []
val_labels = []
for label, animal in enumerate(['cats', 'dogs', 'birds']):
    for img_path in os.listdir(os.path.join(val_dir, animal)):
        try:
            img = load_img(os.path.join(val_dir, animal, img_path), target_size=(64, 64))
            img = img_to_array(img)
            img = img / 255.0  # normalizing pixel values to be between 0 and 1
            val_data.append(img)
            val_labels.append(label)
        except (UnidentifiedImageError, OSError):
            print(f"Skipping {img_path} due to an error")

val_data = np.array(val_data)
val_labels = np.array(val_labels)

**Cell 7**

In [6]:
def knn_predict(data, train, labels, k):
    """
    Predicts a label for an animal image using the KNN model classification and ecludian distance algorithm

    parameter: data -> numpy array with all images of the three animals that have not been classsified

    parameter: train -> numpy array of images already classified

    parameter: labels -> Holds the corresponding correct label for each image in the data parameter
    
    parameter: k -> Number of K nearest neighbors we will use for classification
    """
    
    data_labels = np.array([])
    print("Start")
    for i in data:
        distances = np.array([])

        for j in train:
            
            dist = np.linalg.norm(i - j)
            distances= np.append(distances, dist)

        
        indexes = []

        min_index = np.argpartition(distances, k)
        indexes.append(min_index[:k])

        potential_labels = indexes[0:k]

        unique_labels, counts = np.unique(potential_labels, return_counts=True)

        classified_label_idx = np.argmax(counts)

        classified_label = unique_labels[classified_label_idx]
      
        model_label = labels[classified_label]

        data_labels = np.append(data_labels, model_label)
        
        
    return data_labels

**Cell 8**

In [7]:
temp = knn_predict(test_data, train_data, train_labels, 66)

Start


**Cell 9**

In [8]:
def accuracy(data_labels, test_labels):
    err = 0
    for i in range(len(data_labels)):
        if data_labels[i] != test_labels[i]:
            err = err + 1
    
    numLabels = data_labels.shape[0]

    accuracy1 = (numLabels - err)/numLabels
    return accuracy1


**Cell 10**

In [9]:
print(temp)
acc = accuracy(temp, test_labels)
print(f"The Classification accuracy is {acc}")

[0. 0. 0. ... 0. 0. 0.]
The Classification accuracy is 0.3587425518800082


**Cell 11**

In [13]:
temp1 = knn_predict(test_data, train_data, train_labels, 200)
print(temp1)
acc1 = accuracy(temp1, test_labels)
print(f"The Classification accuracy is {acc1}")

Start
[0. 0. 0. ... 0. 0. 0.]
The Classification accuracy is 0.34970207520032875


**Cell 12**

In [None]:
temp2 = knn_predict(test_data, train_data, train_labels, 60)
print(temp2)
acc2 = accuracy(temp2, test_labels)
print(f"The Classification accuracy is {acc2}")

Start
[2. 1. 2. ... 2. 0. 2.]
The Classification accuracy is 0.5096493612394672


**Cell 13**

In [14]:
temp3 = knn_predict(test_data, train_data, train_labels, 2)
print(temp3)

Start
[2. 0. 1. ... 2. 0. 2.]
The Classification accuracy is 0.34970207520032875


**Cell 14**

In [17]:
acc4 = accuracy(temp3, test_labels)
print(f"The Classification accuracy is {acc4}")


The Classification accuracy is 0.6040682145058558


**Cell 15**

In [18]:
temp5 = knn_predict(val_data, train_data, train_labels, 2)
print(temp5)
acc5 = accuracy(temp5, val_labels)
print(f"The Classification accuracy is {acc5}")


Start
[2. 0. 0. ... 2. 2. 2.]
The Classification accuracy is 0.608740465883323
