In [1]:
import os
import cv2
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt

ImportError: Matplotlib requires numpy>=1.20; you have 1.19.5

In [None]:
def manhattan_distance(x1, x2):
    return np.sum(np.abs(x1 - x2))

def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

class KNN:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X, method="L1"):
        y_pred = [self._predict(x, method=method) for x in X]
        return np.array(y_pred)

    def _predict(self, x, method="L1"):
        # Compute distances between x and all examples in the training set
        if method=="L1":
            distances = [manhattan_distance(x, x_train) for x_train in self.X_train]
        else:
            distances = [euclidean_distance(x, x_train) for x_train in self.X_train]
        # Sort by distance and return indices of the first k neighbors
        k_indices = np.argsort(distances)[:self.k]
        # Extract the labels of the k nearest neighbor training samples
        k_nearest_labels = [self.y_train[i] for i in k_indices]
        # Return the most common class label
        most_common = np.argmax(np.bincount(k_nearest_labels))
        return most_common
    def measure_distances(self, X, method="L1"):
        # Compute distances between x and all examples in the training set
        self.distance_array=[]
        for x in tqdm(X):
            if method=="L1":
                distances = [manhattan_distance(x, x_train) for x_train in self.X_train]
            else:
                distances = [euclidean_distance(x, x_train) for x_train in self.X_train]
            self.distance_array.append(distances)
    def predict_by_k(self, k):
        y_pred=[]
        for dst in self.distance_array:
            k_indices = np.argsort(dst)[:k]
            # Extract the labels of the k nearest neighbor training samples
            k_nearest_labels = [self.y_train[i] for i in k_indices]
            # print(k_nearest_labels)
            # Return the most common class label
            most_common = np.argmax(np.bincount(k_nearest_labels))
            y_pred.append(most_common)
        return np.array(y_pred)


In [None]:
# Example usage
# Create a toy dataset
X = np.array([[1, 2], [1.5, 1.8], [5, 8], [8, 8], [1, 0.6], [9, 11]])
y = np.array([0, 0, 1, 1, 0, 1])

# Instantiate the KNN classifier
knn = KNN(k=2)

# Train the classifier
knn.fit(X, y)

# Test data
X_test = np.array([[1, 2.5], [2, 3], [6, 9], [7, 9], [1, 1], [10, 12]])

# Predict the labels for the test data
y_pred = knn.predict(X_test, method="L2")

In [None]:
# Print the predicted labels
print("Predicted labels:", y_pred)

In [None]:
# Example usage
# Create a toy dataset
X = np.array([[1, 2], [1.5, 1.8], [5, 8], [8, 8], [1, 0.6], [9, 11]])
y = np.array([0, 0, 1, 1, 0, 1])

# Instantiate the KNN classifier
knn = KNN(k=2)

# Train the classifier
knn.fit(X, y)

# Test data
X_test = np.array([[1, 2.5], [2, 3], [6, 9], [7, 9], [1, 1], [10, 12]])

# Predict the labels for the test data
knn.measure_distances(X_test)

y_pred = knn.predict_by_k(2)

# Print the predicted labels
print("Predicted labels:", y_pred)
y_pred = knn.predict_by_k(5)

# Print the predicted labels
print("Predicted labels:", y_pred)

In [None]:
DATA_DIR = 'CIFAR-10-images-master'
TRAIN_DATA_DIR = os.path.join(DATA_DIR, 'train')
TEST_DATA_DIR = os.path.join(DATA_DIR, 'test')

In [None]:
IMG_SIZE = 32
CATEGORIES = []

for i in os.listdir(TRAIN_DATA_DIR):
    CATEGORIES.append(i)
    
print(CATEGORIES)

In [None]:
plt.figure(figsize=(15,15))
i=0
for c in CATEGORIES:  
    path = os.path.join(TRAIN_DATA_DIR,c)
    for img in os.listdir(path):
        img_array = cv2.imread(os.path.join(path,img), cv2.IMREAD_GRAYSCALE)
        plt.subplot(10,10,i+1)
        plt.imshow(img_array)
        if i%10 == 0:
            plt.ylabel(c)
        plt.xticks([])
        plt.yticks([])
        i += 1
        if i%10 == 0:
            break

plt.tight_layout()        
plt.show() 

In [None]:
training_data = []
training_data_X = []
training_data_Y = []
for c in CATEGORIES:
    path = os.path.join(TRAIN_DATA_DIR, c) # 'E:/DATASETS/classification/Face Mask Dataset/train/WithMask'
    class_num = CATEGORIES.index(c) # 0
    for img in tqdm(os.listdir(path)):
        try:
            img_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)   # read the image
            img_resized = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))  # resize the image
            training_data.append([np.reshape(img_resized, IMG_SIZE*IMG_SIZE), class_num]) # [ [img, 0], [], [], [], ...., []]
            #training_data_X.append(np.reshape(img_resized, (IMG_SIZE*IMG_SIZE))) # [ [img, 0], [], [], [], ...., []]
            #training_data_Y.append(class_num) # [ [img, 0], [], [], [], ...., []]
            #training_data_X = np.append(training_data_X, np.reshape(img_resized, (1,32*32)), axis=0)
            #training_data_Y = np.append(training_data_Y, class_num, axis=0)
        except WException as e:
            pass
        
print(len(training_data))

In [None]:
print(training_data[0][0])
print(training_data[0][1])

In [None]:
print(len(training_data[0][0]))
print(len(training_data[0]))

In [None]:
training_data_X = []
training_data_Y = []

In [None]:
from numpy import random

In [None]:
random.shuffle(training_data)

In [None]:
for data in training_data:
    training_data_X.append(data[0])
    training_data_Y.append(data[1])

In [None]:
print(training_data_X[0])

In [None]:
print(training_data_Y[0:30000])

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_digits
from sklearn.model_selection import KFold

In [None]:
X=np.array(training_data_X[:2000])
y=np.array(training_data_Y[:2000])

In [None]:
print(X[0])

In [None]:
# Define the number of folds for cross-validation
k_folds = 5
result1=[]
# Create a KNN classifier with k=3
knn3 = KNN()
# Perform cross-validation
kf = KFold(n_splits=k_folds)
for train_index, test_index in kf.split(X):
    #print("k-fold")
    # Split the data into training and testing sets for this fold
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Train the classifier
    knn3.fit(X_train, y_train)

    #measure distances
    knn3.measure_distances(X_test, method="L1")
    # Make predictions on the test set
    accuracies1 = []
    x_axis1=[]
    for i in tqdm(range(1,100)):
        x_axis1.append(i)
        y_pred = knn3.predict_by_k(i)
        # Calculate the accuracy of the classifier for this fold
        accuracy = accuracy_score(y_test, y_pred)
        accuracies1.append(accuracy)
    result1.append(accuracies1)

# Calculate the average accuracy across all folds
# average_accuracy = np.mean(accuracies)
# print("Average Accuracy:", average_accuracy)

In [None]:
# Define the number of folds for cross-validation
k_folds = 5
result2=[]
# Create a KNN classifier with k=3
knn4 = KNN()
# Perform cross-validation
kf = KFold(n_splits=k_folds)
for train_index, test_index in kf.split(X):
    #print("k-fold")
    # Split the data into training and testing sets for this fold
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Train the classifier
    knn4.fit(X_train, y_train)

    #measure distances
    knn4.measure_distances(X_test, method="L2")
    # Make predictions on the test set
    accuracies2 = []
    x_axis2=[]
    for i in tqdm(range(1,100,2)):
        x_axis2.append(i)
        y_pred = knn4.predict_by_k(i)
        # Calculate the accuracy of the classifier for this fold
        accuracy2 = accuracy_score(y_test, y_pred)
        accuracies2.append(accuracy2)
    result2.append(accuracies2)

# Calculate the average accuracy across all folds
# average_accuracy = np.mean(accuracies)
# print("Average Accuracy:", average_accuracy)

In [None]:
y_axis1=[]
for i in np.transpose(result1):
    average_accuracy = np.mean(i)
    y_axis1.append(average_accuracy)
    #print("Average Accuracy:", average_accuracy)

In [None]:
y_axis2=[]
for i in np.transpose(result2):
    average_accuracy = np.mean(i)
    y_axis2.append(average_accuracy)
    #print("Average Accuracy:", average_accuracy)

In [None]:
# x_axis=[]
# y_axis=[]
# for i in result:
#     x_axis.append(i[0])
#     y_axis.append(i[1])
#     # print(i)
print(x_axis1)
print(y_axis1)

In [None]:
# x_axis=[]
# y_axis=[]
# for i in result:
#     x_axis.append(i[0])
#     y_axis.append(i[1])
#     # print(i)
print(x_axis2)
print(y_axis2)

In [None]:
plt.figsize=(15,15)
x1 = np.array(x_axis1[:])
y1 = np.array(y_axis1[:])

x2 = np.array(x_axis2[:])
y2 = np.array(y_axis2[:])

plt.plot(x1, y1, label="L1", marker = '.')
plt.plot(x2, y2, label="L2", marker = '.')
plt.legend()
plt.ylim((0.05,0.25))
plt.xlabel("cross validation on k")
plt.ylabel("cross validation accuracy")
plt.show()

In [None]:
print("For L1:")
print("Max accuracy: ", np.max(y_axis1))
print("Value of K: ", x_axis1[np.argmax(y_axis1)])
print()
print("For L2:")
print("Max accuracy: ", np.max(y_axis2))
print("Value of K: ", x_axis2[np.argmax(y_axis2)])