## a and b. 

## Importing needed libraries

In [24]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from glob import glob

## Loading the dataset and preprocessing it 

In [1]:
train_folder = "train"
test_folder = "test1"

In [3]:
def load_images(folder_path, img_size=(64, 64)):
    images = []
    labels = []
    for file_path in glob(os.path.join(folder_path, '*.jpg')):

        img = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)

        img = cv2.resize(img, img_size)

        img = img.astype('float32') / 255.0
        
        if 'dog' in file_path:
            label = 1
        elif 'cat' in file_path:
            label = 0
        else:
            label = None
        images.append(img)
        labels.append(label)

    return np.array(images), np.array(labels)

In [11]:
train_images, train_labels = load_images(train_folder)
test_images, _ = load_images(test_folder)

In [12]:
print("Train Images Shape:", train_images.shape)
print("Train Labels Shape:", train_labels.shape)
print("Test Images Shape:", test_images.shape)

Train Images Shape: (25000, 64, 64)
Train Labels Shape: (25000,)
Test Images Shape: (12500, 64, 64)


### the above shapes show we have done it correctly!

## c.

### splitting the dataset into train and test set

In [17]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(train_images, train_labels, test_size=0.2, random_state=42)

In [20]:
print("Train Images Shape:", X_train.shape)
print("Train Labels Shape:", y_train.shape)
print("Test Images Shape:", X_test.shape)
print("Test Images Shape:", y_test.shape)

Train Images Shape: (20000, 64, 64)
Train Labels Shape: (20000,)
Test Images Shape: (5000, 64, 64)
Test Images Shape: (5000,)


## d.

### training a KNN model 

In [18]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
knn_classifier = KNeighborsClassifier(n_neighbors=5)

In [22]:
knn_classifier.fit(X_train.reshape(len(X_train), -1), y_train)
test_predictions = knn_classifier.predict(X_test.reshape(len(X_test), -1))

### report 

In [28]:
report = classification_report(y_test, test_predictions, target_names= ['Cat', 'Dog'])
print("Classification Report:")
print(report)

Classification Report:
              precision    recall  f1-score   support

         Cat       0.55      0.72      0.62      2515
         Dog       0.59      0.40      0.47      2485

    accuracy                           0.56      5000
   macro avg       0.57      0.56      0.55      5000
weighted avg       0.57      0.56      0.55      5000



## e. 

## evaluation 

In [30]:
from sklearn.model_selection import cross_validate
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score

### evaluating the knn model in terms of different metrics 

In [31]:
scoring = {
    'accuracy': make_scorer(accuracy_score),
    'precision': make_scorer(precision_score, average='macro'),
    'recall': make_scorer(recall_score, average='macro'),
    'f1_score': make_scorer(f1_score, average='macro')
}

cv_results = cross_validate(knn_classifier, train_images.reshape(len(train_images), -1), train_labels, cv=10, scoring=scoring)

In [34]:
avg_accuracy = np.mean(cv_results['test_accuracy'])
std_accuracy = np.std(cv_results['test_accuracy'])

avg_precision = np.mean(cv_results['test_precision'])
std_precision = np.std(cv_results['test_precision'])

avg_recall = np.mean(cv_results['test_recall'])
std_recall = np.std(cv_results['test_recall'])

avg_f1_score = np.mean(cv_results['test_f1_score'])
std_f1_score = np.std(cv_results['test_f1_score'])

In [33]:
print("Average Accuracy:", avg_accuracy, "±", std_accuracy)
print("Average Precision:", avg_precision, "±", std_precision)
print("Average Recall:", avg_recall, "±", std_recall)
print("Average F1-score:", avg_f1_score, "±", std_f1_score)

Average Accuracy: 0.55832 ± 0.009594665184361565
Average Precision: 0.5670058647050586 ± 0.010783270604225176
Average Recall: 0.55832 ± 0.00959466518436156
Average F1-score: 0.5434821376191263 ± 0.010408495244943944


## f.

### in terms of the above resutl, we come to conclusion that KNN classifer is not suitable for image classificatio tasks, since it has poor performance in terms of above metrics

### Average Accuracy = 55 %, it means on average, the classifier  predicts the class of around 55.83% of the test instances correctly. Not a good accuracy!

### Average Precision = 56 % , it means Precision measures the proportion of true positive predictions among all positive predictions made by the classifier. A higher precision indicates fewer false positive predictions.


### Average Recall = 55 % it means  Recall measures the proportion of true positive predictions among all actual positive instances in the dataset

### Average F1-score = 54 % it means The average F1-score of the classifier is approximately 54.35%

### to conclude, knn is not suitable for image classification tasks 