In [1]:
import os
import struct
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

# 1 . Create Dataset -- mix mnist training set and test sets together

In [2]:
def load_data(path, kind=''):
    """Load MNIST data from `path`"""
    labels_path = os.path.join(path,
                               '%s-labels-idx1-ubyte'
                               % kind)
    images_path = os.path.join(path,
                               '%s-images-idx3-ubyte'
                               % kind)
    with open(labels_path, 'rb') as lbpath:
        magic, n = struct.unpack('>II',
                                 lbpath.read(8))
        labels = np.fromfile(lbpath,
                             dtype=np.uint8)

    with open(images_path, 'rb') as imgpath:
        magic, num, rows, cols = struct.unpack('>IIII',
                                               imgpath.read(16))
        images = np.fromfile(imgpath,
                             dtype=np.uint8).reshape(len(labels), 784)

    return images, labels

In [3]:
path = './datasets/'

In [4]:
X_train,y_train = load_data(path, kind='train')
X_test,y_test = load_data(path, kind='t10k')

In [5]:
X = np.concatenate((X_train, X_test), axis = 0 )
y = np.concatenate((y_train, y_test), axis = 0 )

# 2 . Build Classifier

In [6]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier

In [7]:
clf_ada = AdaBoostClassifier(
    DecisionTreeClassifier(max_depth=1000),
    n_estimators=6000,
    learning_rate=0.001)

# 3. Training

In [8]:
from sklearn.model_selection import KFold
kf = KFold(3, random_state=42, shuffle=True)
hs_scores = []
hs_errors = []
for train_index, test_index in kf.split(X):
    print("TRAIN:", len(train_index), "TEST:", len(test_index))

    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    clf_ada.fit(X_train,y_train)
    
    score = clf_ada.score(X_test, y_test)
    
    hs_scores.append(score)
    hs_errors.append(1-score)
    
    print("Accuracy is %.2f%%"% (score*100))
    print("Test error is %.2f%%"% ((1-score)*100))

TRAIN: 46666 TEST: 23334
Accuracy is 86.52%
Test error is 13.48%
TRAIN: 46667 TEST: 23333
Accuracy is 87.10%
Test error is 12.90%
TRAIN: 46667 TEST: 23333
Accuracy is 86.29%
Test error is 13.71%


In [9]:
hs_scores

[0.8652181366246678, 0.871041014871641, 0.8628980414005915]

In [10]:
hs_errors

[0.13478186337533216, 0.12895898512835902, 0.13710195859940855]