In [None]:
# Importing the required Libraries
from sklearn.datasets import fetch_openml
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import matplotlib
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.metrics import accuracy_score,f1_score,confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
import cv2

# Fetching Dataset

In [None]:
mnist = fetch_openml('mnist_784')

In [None]:
# Extracting Data and target
x, y = np.array(mnist['data']), np.array(mnist['target'])

In [None]:
# Shuffling the dataset
shuffle_index = np.random.permutation(70000)
x, y = x[shuffle_index], y[shuffle_index]

In [None]:
# Train-test split
x_train, x_test = x[:60000], x[60000:]
y_train, y_test = y[:60000].astype(int), y[60000:].astype(int)

# Training the model

In [None]:
# Random Forest
clf1 = RandomForestClassifier()
clf1.fit(x_train, y_train)

In [None]:
# KNN
clf2 = KNeighborsClassifier()
clf2.fit(x_train, y_train)

# Testing the model

In [None]:
# Random Forest
y_predicted_RF = clf1.predict(x_test)
print("Random Forest :")
print("Accuracy :", accuracy_score(y_test, y_predicted_RF)*100, "%")
print("F1 Score :", f1_score(y_test, y_predicted_RF, average='macro')*100, "%")

In [None]:
# KNN
y_predicted_KNN = clf2.predict(x_test)
print("\nKNN :")
print("Accuracy :", accuracy_score(y_test, y_predicted_KNN)*100, "%")
print("F1 Score :", f1_score(y_test, y_predicted_KNN, average='macro')*100, "%")

# Confusion Matrix

In [None]:
# Random Forest
print("Random Forest :")
print(confusion_matrix(y_test, y_predicted_RF))

In [None]:
# KNN
print("KNN : ")
print(confusion_matrix(y_test, y_predicted_KNN))

# Cross Validation

In [None]:
# Random Forest
print(cross_val_score(clf1, x_train, y_train, cv=3, scoring="accuracy")) 

In [None]:
# KNN
print(cross_val_score(clf2, x_train, y_train, cv=3, scoring="accuracy"))

# Testing on Random Image outside Dataset

In [None]:
# taking the path of image for testing as input
path = input("Enter the path of image :")

# reading the image into variable as grayscale
img = cv2.imread(path, cv2.IMREAD_GRAYSCALE) 

# resizing the image to 28 x 28 shape
new_img = cv2.resize(img, (28,28)) 

# converting the image to numpy array for processing
np_img = np.array(new_img) 

# reshaping the numpy array as our model is trained for 1 x 784 shaped arrays
np_img = np_img.reshape((1, 784))

# pre-processing done

In [None]:
# Predicting
print("Random Forest predicts the digit : ",clf1.predict(np_img))
print("KNN predicts the digit : ",clf2.predict(np_img))

In [None]:
# showing the image
plt.imshow(np_img.reshape((28,28)))

In [None]:
# the probabilities for each digit
y_pr1 = clf1.predict_proba(np_img) # Random Forest
y_pr2 = clf2.predict_proba(np_img) # KNN
print("Chances for each digit prediction")
print("Digit \t Random Forest \t KNN")
for i in range(10):
    print(i," -> ", int(y_pr1[0][i]*100), "% \t", int(y_pr2[0][i]*100), "%")