# Boosting

In [26]:
import numpy as np 
from tensorflow.keras.datasets import mnist
from sklearn import metrics
from sklearn.metrics import accuracy_score
from tabulate import tabulate
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
from sklearn.model_selection import cross_val_score, ShuffleSplit
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
import matplotlib.pyplot as plt



## Import data

In [27]:
(train_X_unshaped, train_y), (test_X_unshaped, test_y) = mnist.load_data()

print('X_train: ' + str(train_X_unshaped.shape))
print('Y_train: ' + str(train_y.shape))
print('X_test:  '  + str(test_X_unshaped.shape))
print('Y_test:  '  + str(test_y.shape))
print(train_y)

X_train: (60000, 28, 28)
Y_train: (60000,)
X_test:  (10000, 28, 28)
Y_test:  (10000,)
[5 0 4 ... 5 6 8]


## Preprocess data

In [28]:
train_X = train_X_unshaped.reshape(train_X_unshaped.shape[0], train_X_unshaped.shape[1]*train_X_unshaped.shape[2])/255.0
test_X = test_X_unshaped.reshape(test_X_unshaped.shape[0], test_X_unshaped.shape[1]*test_X_unshaped.shape[2])/255.0
print('X_train: ' + str(train_X.shape))
print('X_test:  '  + str(test_X.shape))

X_train: (60000, 784)
X_test:  (10000, 784)


## Create model

In [29]:
def train(est, X, y, cv, name):
    est.fit(train_X, train_y)
    cv_score = cross_val_score(est, X, y, cv=cv, scoring='f1_macro')
    print(str(name) + ": f1 score " + str(cv_score.mean() + " +/- " + str(cv_score.std())))

In [30]:
cv = ShuffleSplit(n_splits=10, test_size=.2)

In [None]:
def test_model(est, X, y):
    y_pred = est.predict(X)
    print("Classification Report")
    print(classification_report(y, y_pred))

    print("Confusion Matrix")
    cmd = ConfusionMatrixDisplay(confusion_matrix(y, y_pred))
    cmd.plot()
    plt.title("Confusion Matrix")
    plt.show()

In [22]:
train_y_pred = clf.predict(train_X)
print(train_y_pred[1:20])
print(train_y[1:20])

[5 4 2 4 2 3 3 1 7 3 2 3 8 1 9 2 8 6 7]
[0 4 1 9 2 1 3 1 4 3 5 3 6 1 7 2 8 6 9]


10

In [23]:
clf.n_classes_

10

In [None]:
accuracy = accuracy_score(train_y, train_y_pred)
print('Accuracy score : ' + str(accuracy))