# Model Assessment
Load the `mnist` dataset. Define a grid of hyperparameters for a couple of classifiers. Tune the hyperparameters using grid search. Evaluate the best model using k-fold cross-validation. Report different evaluation metrics. 

## Importing Modules

In [19]:
import pandas as pd
import sklearn.model_selection
import sklearn.metrics
import sklearn.svm
import sklearn.tree
import plotly.express as px

## Loading the Dataset

In [20]:
df = pd.read_csv("../../datasets/mnist.csv")
df = df.set_index("id")
df.head(3)

Unnamed: 0_level_0,class,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
31953,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
34452,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
60897,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Splitting the Data into Training and Test Sets

In [21]:
x = df.drop(["class"], axis=1)
y = df["class"]

x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(x, y)

## Model Selection and Hyperparameter Tuning

In [22]:
# Decistion Tree --------------------
parameters_grid = {
    "criterion": ["gini", "entropy"], 
    "max_depth": range(1, 20, 3),   # [1, 4, 7, ...]
    "min_samples_split": range(2, 20, 3)
}
model_1 = sklearn.model_selection.GridSearchCV(sklearn.tree.DecisionTreeClassifier(), 
                                               parameters_grid, scoring="accuracy", cv=5, n_jobs=-1)
model_1.fit(x_train, y_train)
print("Accuracy of best decision tree classfier = {:.2f}".format(model_1.best_score_))
print("Best found hyperparameters of decision tree classfier = {}".format(model_1.best_params_))
# -----------------------------------

# SVM -------------------------------
parameters_grid = {
    "kernel": ["linear", "rbf", "poly"], 
    "C": [0.001, 0.01, 0.1, 1, 10, 100]
}
model_2 = sklearn.model_selection.GridSearchCV(sklearn.svm.SVC(), 
                                               parameters_grid, scoring="accuracy", cv=5, n_jobs=-1)
model_2.fit(x_train, y_train)
print("Accuracy of best SVM classfier = {:.2f}".format(model_2.best_score_))
print("Best found hyperparameters of SVM classifier = {}".format(model_2.best_params_))
# -----------------------------------

Accuracy of best decision tree classfier = 0.76
Best found hyperparameters of decision tree classfier = {'criterion': 'entropy', 'max_depth': 7, 'min_samples_split': 5}
Accuracy of best SVM classfier = 0.95
Best found hyperparameters of SVM classifier = {'C': 10, 'kernel': 'rbf'}


## Testing the Best Model

In [23]:
y_predicted = model_2.predict(x_test)
accuracy = sklearn.metrics.accuracy_score(y_test, y_predicted)
cm = sklearn.metrics.confusion_matrix(y_test, y_predicted)
precision, recall, f1, support = sklearn.metrics.precision_recall_fscore_support(y_test, y_predicted)

print("Accuracy =", accuracy)
print("Precision =", precision)
print("Recall =", recall)
print("F1-Score =", f1)
print("Confusion Matrix:\n", cm)

Accuracy = 0.955
Precision = [0.98924731 0.95081967 0.96363636 0.96261682 0.95833333 0.87356322
 0.98019802 0.96296296 0.9673913  0.92857143]
Recall = [1.         0.99145299 0.96363636 0.92792793 0.95833333 0.96202532
 0.97058824 0.97196262 0.90816327 0.88636364]
F1-Score = [0.99459459 0.9707113  0.96363636 0.94495413 0.95833333 0.91566265
 0.97536946 0.96744186 0.93684211 0.90697674]
Confusion Matrix:
 [[ 92   0   0   0   0   0   0   0   0   0]
 [  0 116   0   0   1   0   0   0   0   0]
 [  0   0 106   0   0   1   0   2   0   1]
 [  0   1   1 103   0   2   1   1   2   0]
 [  0   0   1   0  92   0   0   1   0   2]
 [  0   1   0   0   0  76   0   0   1   1]
 [  1   1   0   0   0   1  99   0   0   0]
 [  0   0   0   0   1   0   0 104   0   2]
 [  0   2   0   2   0   4   1   0  89   0]
 [  0   1   2   2   2   3   0   0   0  78]]
