<a href="https://colab.research.google.com/github/RecaiEfeDik/Handwritten-Digit-Recognition-Using-KNN/blob/main/Handwritten_Digit_Recognition_Using_KNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import threading
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn import preprocessing

In [2]:
#Read data and turn it into np array
df = pd.read_csv('dataset.csv')
total_data = np.array(df)

#Apply standart scaling
# total_data_scaled = preprocessing.StandardScaler().fit_transform(total_data)

#Split train and test data set
train_data, test_data = train_test_split(total_data, test_size=0.001, random_state=42)

#Seperate labels and flattened pixel inputs
labels = train_data[:,0]
train_data = train_data[:,1:]
test_labels = test_data[:,0]
test_data = test_data[:,1:]

In [3]:
#Define different types of distance functions to be used

def manhattan_distance(x, y, distance_list):
  distance_list.append(np.sum(np.abs(x-y)))

def euclidean_distance(x, y, distance_list):
  distance_list.append(np.sqrt(np.sum((x-y)**2)))

def cosine_distance(x, y, distance_list):
  distance_list.append(1 - np.dot(x, y))

In [4]:
#Define KNN Classifier

def KNN_Classifier(labels, train_data, test_point, k, distance_function_type, prediction_list):

  #Calculate distances according to function
  if distance_function_type == 'manhattan':
    distance_function = manhattan_distance
  elif distance_function_type == 'euclidean':
    distance_function = euclidean_distance
  elif distance_function_type == 'cosine':
    distance_function = cosine_distance

  distances = []

  threads = list(map(lambda x: threading.Thread(target=distance_function, args=(test_point, x, distances)), train_data))

  for thread in threads:
    thread.start()

  for thread in threads:
    thread.join()

  distances = np.array(distances)

  #Sort the distances
  sorted_distances = np.argsort(distances)

  #Select the "k" number of labels
  k_nearest_labels = labels[sorted_distances[:k]]

  #Choose the label with maximum counts
  predicted_label = np.argmax(np.bincount(k_nearest_labels))

  prediction_list.append(predicted_label)


In [15]:
#Do testing for Manhattan distance functions with k=5

predictions = []

for i in range(len(test_data)):
  print("Progress: %" + str((i/len(test_data)*100)) + "\n")
  KNN_Classifier(labels, train_data, test_data[i], 5, 'manhattan', predictions)


Progress: %0.0

Progress: %2.380952380952381

Progress: %4.761904761904762

Progress: %7.142857142857142

Progress: %9.523809523809524

Progress: %11.904761904761903

Progress: %14.285714285714285

Progress: %16.666666666666664

Progress: %19.047619047619047

Progress: %21.428571428571427

Progress: %23.809523809523807

Progress: %26.190476190476193

Progress: %28.57142857142857

Progress: %30.952380952380953

Progress: %33.33333333333333

Progress: %35.714285714285715

Progress: %38.095238095238095

Progress: %40.476190476190474

Progress: %42.857142857142854

Progress: %45.23809523809524

Progress: %47.61904761904761

Progress: %50.0

Progress: %52.38095238095239

Progress: %54.761904761904766

Progress: %57.14285714285714

Progress: %59.523809523809526

Progress: %61.904761904761905

Progress: %64.28571428571429

Progress: %66.66666666666666

Progress: %69.04761904761905

Progress: %71.42857142857143

Progress: %73.80952380952381

Progress: %76.19047619047619

Progress: %78.57142857

In [16]:
#Evaluate model
accuracy = accuracy_score(test_labels, predictions)
print("Accuracy:\n", accuracy)
print("\nConfusion Matrix:\n", confusion_matrix(test_labels, predictions))
print("\nClassification Report:\n", classification_report(test_labels, predictions))


Accuracy:
 0.8095238095238095

Confusion Matrix:
 [[3 0 0 0 0 0 0 0 0 0]
 [0 5 0 0 0 0 0 0 0 0]
 [0 0 4 0 0 0 0 0 0 0]
 [0 1 0 3 0 0 0 0 1 0]
 [0 0 0 0 2 0 0 0 0 0]
 [0 0 0 0 0 1 0 0 0 0]
 [0 0 1 0 0 0 8 0 0 0]
 [1 0 0 0 0 0 0 3 0 0]
 [0 0 0 0 0 0 0 1 3 0]
 [0 1 2 0 0 0 0 0 0 2]]

Classification Report:
               precision    recall  f1-score   support

           0       0.75      1.00      0.86         3
           1       0.71      1.00      0.83         5
           2       0.57      1.00      0.73         4
           3       1.00      0.60      0.75         5
           4       1.00      1.00      1.00         2
           5       1.00      1.00      1.00         1
           6       1.00      0.89      0.94         9
           7       0.75      0.75      0.75         4
           8       0.75      0.75      0.75         4
           9       1.00      0.40      0.57         5

    accuracy                           0.81        42
   macro avg       0.85      0.84      0.82 