In [1]:
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
@author: Xiao Jin
"""
import numpy as np
import tensorflow as tf
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score, confusion_matrix

In [2]:
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

X_train /= 255.
X_test /= 255.

X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)

roc_Decision = 0

In [4]:
"""Decision Tree"""

tree = DecisionTreeClassifier()
# tree = DecisionTreeClassifier(max_depth=100, min_samples_leaf=1)
tree.fit(X_train, y_train)
y_pred = tree.predict(X_test)

sum = 0.0
for i in range(10000):
    if (y_pred[i] == y_test[i]):
        sum = sum + 1

print('Test set score: %f' % (sum / 10000.))
confusion_1 = confusion_matrix(y_test,y_pred)
print(confusion_1)

Test set score: 0.878600
[[ 917    0    8    7    3    8   14    5   12    6]
 [   2 1087   10    7    1    8    5    1   12    2]
 [  11   11  882   28   15   14   14   27   21    9]
 [   6    7   28  866    6   43    4    9   23   18]
 [   4    4    8    8  856    8   18   12   20   44]
 [  13    7    6   46    7  749   22    4   26   12]
 [  18    7   10    9   13   21  847    2   27    4]
 [   2   10   27   14    4    5    3  931    8   24]
 [  13    4   24   40   19   28   17    8  790   31]
 [  13    4    8   22   38    9    8   23   23  861]]


In [5]:
"""Bagging Random Forest"""

model = RandomForestClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# k-fold cross validation
score = np.mean(cross_val_score(model,X_train,y_train,cv=10))
sum = 0.0
for i in range(10000):
    if (y_pred[i] == y_test[i]):
        sum = sum + 1

print('Test set score: %f, cross valid score: %f' % (sum / 10000., score))
confusion_2 = confusion_matrix(y_test,y_pred)
print(confusion_2)

Test set score: 0.969600, cross valid score: 0.966817
[[ 969    0    0    0    0    1    4    1    4    1]
 [   0 1122    4    3    0    2    2    0    2    0]
 [   3    0 1005    5    3    0    1    8    7    0]
 [   0    0   11  970    1    8    0    9    8    3]
 [   1    0    2    0  956    0    4    1    2   16]
 [   4    0    1   12    4  856    5    3    4    3]
 [   8    3    0    0    2    3  939    0    3    0]
 [   1    1   20    3    1    0    0  989    2   11]
 [   3    0    4    6    5    5    2    4  935   10]
 [   5    6    0   11   15    4    1    5    7  955]]


In [6]:
"""Boosting Adaboost"""

model = AdaBoostClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
# k-fold cross validation
score = np.mean(cross_val_score(model,X_train,y_train,cv=10))
sum = 0.0
for i in range(10000):
    if (y_pred[i] == y_test[i]):
        sum = sum + 1

print('Test set score: %f, cross valid score: %f' % (sum / 10000., score))
confusion_3 = confusion_matrix(y_test,y_pred)
print(confusion_3)

Test set score: 0.729900, cross valid score: 0.725533
[[ 883    0   25    3    4   28   23    3    3    8]
 [   0 1070    3    8    3    1    4   25   21    0]
 [  30   35  596   32   18    8  208   25   75    5]
 [  28   32   19  678    2   92   30   33   74   22]
 [   4    2   17   14  708   16   10   80   35   96]
 [  29   32    6  122   27  526   22   19   71   38]
 [  20   10   35    6   26   32  822    1    6    0]
 [   7   16   23    8   14    7    1  804   20  128]
 [  40   48   11   91   15   34   22   18  661   34]
 [   9   11   23   32  161   18    1  169   34  551]]
