<a href="https://colab.research.google.com/github/RamaBharti/Classification-of-Handwritten-Digit-on-MNSIT-dataset/blob/master/mnsit_knn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
#from sklearn.datasets import fetch_openml
from sklearn.preprocessing import StandardScaler
import numpy as np
import urllib.request
import gzip

url = 'http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz'
filename, headers = urllib.request.urlretrieve(url, 'train-images-idx3-ubyte.gz')
# Load training images
with gzip.open(filename, 'rb') as f:
    data = np.frombuffer(f.read(), np.uint8, offset=16)
    X_train = data.reshape(-1, 28*28)
url = 'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz'
filename, headers = urllib.request.urlretrieve(url, 'train-labels-idx1-ubyte.gz')
# Load training labels
with gzip.open(filename, 'rb') as f:
    data = np.frombuffer(f.read(), np.uint8, offset=8)
    y_train = data
url = 'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz'
filename, headers = urllib.request.urlretrieve(url, 't10k-images-idx3-ubyte.gz')
# Load test images
with gzip.open(filename, 'rb') as f:
    data = np.frombuffer(f.read(), np.uint8, offset=16)
    X_test = data.reshape(-1, 28*28)
url = 'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz'
filename, headers = urllib.request.urlretrieve(url, 't10k-labels-idx1-ubyte.gz')
# Load test labels
with gzip.open(filename, 'rb') as f:
    data = np.frombuffer(f.read(), np.uint8, offset=8)
    y_test = data

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.astype(np.float32))
X_test = scaler.transform(X_test.astype(np.float32))

knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
print("Accuracy_knn:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred, average='macro'))
print("Recall:", recall_score(y_test, y_pred, average='macro'))
print("F1-score:", f1_score(y_test, y_pred, average='macro'))
print("Confusion matrix:\n", confusion_matrix(y_test, y_pred))

cv_scores = cross_val_score(knn, X_train, y_train, cv=5)
print("Cross-validation scores:", cv_scores)
print("Mean cross-validation score:", cv_scores.mean())

params = {
    'n_neighbors': [3, 5, 7],
    'weights': ['uniform', 'distance']
}
grid_search = GridSearchCV(knn, params, cv=5)
grid_search.fit(X_train, y_train)
print("Best parameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)
best_model = grid_search.best_estimator_
best_model.fit(X_train,y_train)
y_pred = best_model.predict(X_test)
print("Accuracy_grid:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred, average='macro'))
print("Recall:", recall_score(y_test, y_pred, average='macro'))
print("F1-score:", f1_score(y_test, y_pred, average='macro'))
print("Confusion matrix:\n", confusion_matrix(y_test, y_pred))

# Use randomized search to tune hyperparameters
params = {
    'n_neighbors': range(1, 31),
    'weights': ['uniform', 'distance'],
    'leaf_size': range(10, 101, 10)
}
random_search = RandomizedSearchCV(knn, params, n_iter=10, cv=5, scoring='accuracy')
random_search.fit(X_train, y_train)
print("Best parameters:", random_search.best_params_)
print("Best score:", random_search.best_score_)

# Train the model with the best hyperparameters
best_model = random_search.best_estimator_
best_model.fit(X_train, y_train)
y_pred = best_model.predict(X_test)
# Evaluate the performance of the model using different evaluation metrics
print("Accuracy_randon_search:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred, average='macro'))
print("Recall:", recall_score(y_test, y_pred, average='macro'))
print("F1-score:", f1_score(y_test, y_pred, average='macro'))
print("Confusion matrix:\n", confusion_matrix(y_test, y_pred))




Accuracy_knn: 0.9443
Precision: 0.9442112613151517
Recall: 0.9435861911754685
F1-score: 0.9436860218534469
Confusion matrix:
 [[ 963    0    1    3    1    5    6    1    0    0]
 [   0 1129    3    0    0    0    3    0    0    0]
 [  14    6  960   20    5    0    7    9   10    1]
 [   0    3    5  962    3   13    0   10   10    4]
 [   1   10    5    3  922    3    6    4    2   26]
 [   5    1    3   23    8  824   13    2    6    7]
 [  10    4    2    1    3    6  929    0    3    0]
 [   0   21   12    4    8    2    0  949    1   31]
 [  13    3    6   18    8   30    3    6  880    7]
 [   6    5    5   10   18    6    0   31    3  925]]
Cross-validation scores: [0.94108333 0.94291667 0.94333333 0.93633333 0.94658333]
Mean cross-validation score: 0.94205
Best parameters: {'n_neighbors': 5, 'weights': 'distance'}
Best score: 0.94435
Accuracy_grid: 0.945
Precision: 0.9448822786032318
Recall: 0.9443233317556862
F1-score: 0.9444198669574518
Confusion matrix:
 [[ 963    0    0   