## Importing libraries 

In [1]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from torchsummary import summary
from tqdm import tqdm

## Downloading the dataset.

In [2]:
# Downloading the MNIST dataset.
train_data = MNIST('mnist', train = True, download=True, transform= ToTensor())
test_data = MNIST('mnist', train= False, download=True, transform= ToTensor())

## Making the train and test set.

In [8]:
x_train = []
y_train = []
for image, label in train_data:
    # print(image.shape)
    x = image.ravel()
    y = label
    x_train.append(x)
    y_train.append(y)
x_train = np.array(x_train)
y_train = np.array(y_train)
x_train.shape, y_train.shape

((60000, 784), (60000,))

In [11]:
x_test = []
y_test = []
for image, label in test_data:
    x = image.reshape(-1)
    y = label
    x_test.append(x)
    y_test.append(y)
x_test = np.array(x_test)
y_test = np.array(y_test)
x_test.shape, y_test.shape

((10000, 784), (10000,))

## Importing models 

In [13]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression


## Random Forest implementation.

In [41]:
RF_model = RandomForestClassifier()
RF_model.fit(x_train, y_train)
RF_predicted = RF_model.predict(x_test)

## Stats of RF based model.

In [40]:
accuracy = accuracy_score(y_test,RF_predicted)
precision = precision_score(y_test,RF_predicted, average="macro")
recall = recall_score(y_test, RF_predicted, average="macro")
f1 = f1_score(y_test,RF_predicted, average="macro")
conf = confusion_matrix(y_test,RF_predicted)

print("Accuracy score of RF", accuracy, end="\n\n")
print("Precision score of RF", precision,  end="\n\n")
print("Recall score of RF",recall,  end="\n\n")
print("F1 score of RF", f1,  end="\n\n")
print("Confusion matrix of RF\n", conf,  end="\n\n")

Accuracy score of RF 0.971

Precision score of RF 0.970873752323483

Recall score of RF 0.9707723113121738

F1 score of RF 0.9708061462518804

Confusion matrix of RF
 [[ 970    0    0    0    0    2    3    1    4    0]
 [   0 1122    4    3    0    1    3    0    1    1]
 [   5    0 1001    6    2    1    2    8    7    0]
 [   0    0    7  977    1    4    0   10    9    2]
 [   1    0    0    0  958    0    5    0    3   15]
 [   2    1    1   13    3  861    3    1    6    1]
 [   5    3    1    0    4    4  937    0    4    0]
 [   1    3   18    2    0    0    0  993    1   10]
 [   4    0    4    8    6    5    5    5  930    7]
 [   7    5    2   11    6    3    2    5    7  961]]



## Decision Tree implementation.

In [38]:
DT_model = DecisionTreeClassifier()
DT_model.fit(x_train, y_train)
DT_predicted = DT_model.predict(x_test)

## Stats of DT based model.

In [39]:
accuracy = accuracy_score(y_test,DT_predicted)
precision = precision_score(y_test,DT_predicted, average="macro")
recall = recall_score(y_test, DT_predicted, average="macro")
f1 = f1_score(y_test,DT_predicted, average="macro")
conf = confusion_matrix(y_test,DT_predicted)

print("Accuracy score of DT", accuracy, end="\n\n")
print("Precision score of DT", precision,  end="\n\n")
print("Recall score of DT",recall,  end="\n\n")
print("F1 score of DT", f1,  end="\n\n")
print("Confusion matrix of DT\n", conf,  end="\n\n")

Accuracy score of DT 0.8741

Precision score of DT 0.8726869149994633

Recall score of DT 0.8725292518893152

F1 score of DT 0.8725382334415371

Confusion matrix of DT
 [[ 910    1   10    7    6   11   18    3    6    8]
 [   2 1085    7    7    2    6    6    5   13    2]
 [  12   12  876   37   16    9   17   23   23    7]
 [  11    2   27  861    6   47    4    8   25   19]
 [   7    4    6    5  853   10   23   10   18   46]
 [  18   10    4   38    8  742   22    6   29   15]
 [  16    6   11    9   15   25  838    1   31    6]
 [   4   12   26   17    6    2    1  927    9   24]
 [   9    8   25   43   16   24   16   10  792   31]
 [  13    1    7   22   43   12    6   21   27  857]]



## Logistic Regression Implementation

In [42]:
Log_reg = LogisticRegression()
Log_reg.fit(x_train, y_train)
LR_predicted = Log_reg.predict(x_test)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


## Stats for Logistic Regression model.

In [43]:
accuracy = accuracy_score(y_test,LR_predicted)
precision = precision_score(y_test,LR_predicted, average="macro")
recall = recall_score(y_test, LR_predicted, average="macro")
f1 = f1_score(y_test,LR_predicted, average="macro")
conf = confusion_matrix(y_test,LR_predicted)

print("Accuracy score of LR", accuracy, end="\n\n")
print("Precision score of LR", precision,  end="\n\n")
print("Recall score of LR",recall,  end="\n\n")
print("F1 score of LR", f1,  end="\n\n")
print("Confusion matrix of LR\n", conf,  end="\n\n")

Accuracy score of LR 0.9256

Precision score of LR 0.9247087190360826

Recall score of LR 0.9245122318683123

F1 score of LR 0.9245116077820696

Confusion matrix of LR
 [[ 959    0    0    3    1    7    5    4    1    0]
 [   0 1112    4    2    0    2    3    2   10    0]
 [   6    9  928   16    8    4   15    7   35    4]
 [   4    1   17  921    0   23    4   11   23    6]
 [   1    1    7    4  914    0   10    4   10   31]
 [  10    2    3   37    8  779   14    5   29    5]
 [   9    3    7    3    8   15  910    2    1    0]
 [   1    9   23    6    7    1    0  950    2   29]
 [   9   10    8   26    8   26   12    7  857   11]
 [   9    8    0   11   23    6    0   19    7  926]]



## Results Comparison

| Model                | Accuracy Score | Precision Score | Recall Score | F1 Score |
|----------------------|----------------|-----------------|--------------|----------|
| Random Forest        | 0.9710         | 0.9700          | 0.9707       | 0.9708   |
| Decision Tree        | 0.8741         | 0.8726          | 0.8725       | 0.8725   |
| Logistic Regression  | 0.9256         | 0.9247          | 0.9245       | 0.9245   |