<a href="https://colab.research.google.com/github/BossAyush07/Deep-Learning-Projects/blob/master/xgboost_on_mnist_handwritten_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## IMPORTING IMPORTANT LIBRARIES

In [1]:
from keras.datasets import mnist
from matplotlib import pyplot
import numpy as np
import pandas as pd

## READING THE DATASET

In [2]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [3]:
print(x_train.shape, x_test.shape)

(60000, 28, 28) (10000, 28, 28)


## RESHAPING THE DATASET

In [4]:
x_train = x_train.reshape(x_train.shape[0], 784)
x_test = x_test.reshape(x_test.shape[0], 784)
print(x_train.shape, x_test.shape)

(60000, 784) (10000, 784)


In [5]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()

In [6]:
x_train_std = sc.fit_transform(x_train)
x_test_std = sc.fit_transform(x_test)
print(x_train_std.shape, x_test_std.shape)

(60000, 784) (10000, 784)


In [7]:
from sklearn.model_selection import train_test_split
x_train_sub, x_test_sub, y_train_sub, y_test_sub = train_test_split(x_train_std, y_train, test_size=0.1)

In [8]:
import xgboost as xgb

In [9]:
param_list = [("eta", 0.08), ("max_depth", 6), ("subsample", 0.8), ("colsample_bytree", 0.8), ("objective", "multi:softmax"), ("eval_metric", "merror"), ("alpha", 8), ("lambda", 2), ("num_class", 10)]
n_rounds = 600
early_stopping = 50
d_train = xgb.DMatrix(x_train_sub, label=y_train_sub)
d_val = xgb.DMatrix(x_test_sub, label=y_test_sub)
eval_list = [(d_train, "train"), (d_val, "validation")]

bst = xgb.train(param_list, d_train, n_rounds, evals=eval_list, early_stopping_rounds=early_stopping, verbose_eval=True)

[0]	train-merror:0.142963	validation-merror:0.16
Multiple eval metrics have been passed: 'validation-merror' will be used for early stopping.

Will train until validation-merror hasn't improved in 50 rounds.
[1]	train-merror:0.110778	validation-merror:0.126667
[2]	train-merror:0.096352	validation-merror:0.111167
[3]	train-merror:0.090611	validation-merror:0.105167
[4]	train-merror:0.087148	validation-merror:0.102833
[5]	train-merror:0.084296	validation-merror:0.102167
[6]	train-merror:0.082259	validation-merror:0.100333
[7]	train-merror:0.079	validation-merror:0.097333
[8]	train-merror:0.076796	validation-merror:0.095
[9]	train-merror:0.074463	validation-merror:0.092167
[10]	train-merror:0.072778	validation-merror:0.089333
[11]	train-merror:0.071648	validation-merror:0.0885
[12]	train-merror:0.069574	validation-merror:0.0855
[13]	train-merror:0.068593	validation-merror:0.083333
[14]	train-merror:0.066981	validation-merror:0.081833
[15]	train-merror:0.065741	validation-merror:0.081333
[

## PREDICING THE VALUE ON TEST DATA

In [10]:
d_test = xgb.DMatrix(data=x_test_std)
y_pred = bst.predict(d_test)
y_pred

array([7., 2., 1., ..., 4., 5., 6.], dtype=float32)

## CALCULATING THE ACCURACY OF MODEL

In [11]:
from sklearn.metrics import accuracy_score
print(np.round(accuracy_score(y_test, y_pred)*100, 2), '%')

97.07 %


## CREATING A CONFUSION MATRIX

In [12]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_pred)

array([[ 969,    0,    1,    0,    0,    1,    2,    1,    5,    1],
       [   0, 1114,    2,    3,    0,    5,    1,    0,   10,    0],
       [   4,    0,  997,    7,    3,    0,    1,    5,   14,    1],
       [   2,    0,    5,  982,    0,   11,    0,    1,    7,    2],
       [   1,    0,    3,    1,  962,    0,    2,    0,    4,    9],
       [   3,    0,    0,    5,    0,  871,    3,    1,    9,    0],
       [  10,    2,    1,    0,    7,    7,  924,    0,    7,    0],
       [   2,    3,   24,    5,    6,    2,    0,  961,    8,   17],
       [   5,    1,    4,    0,    3,    3,    0,    1,  954,    3],
       [   7,    2,    2,    6,   10,    3,    0,    0,    6,  973]])