In [29]:
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from torchvision import datasets, transforms
from torchvision.datasets import mnist, CIFAR10
from sklearn.model_selection import KFold
from sklearn.decomposition import PCA

from sklearn import metrics
import xgboost

import time
import torch.nn.functional as TF
import torch.optim as optim
import os
import math
import matplotlib.pyplot as plt
import pickle

torch.set_num_threads(1)


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [39]:
# (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)

transform = transforms.Compose([transforms.Resize((28,28)),
                                transforms.ToTensor(),
                                transforms.Normalize((0.1307,), (0.3081,)),])
test_transform = transforms.Compose([transforms.Resize((28,28)),
                                     transforms.ToTensor(),
                                     transforms.Normalize((0.1307,), (0.3081,)),])

train_dataset = mnist.MNIST(root='data/MNIST/train', train=True,
                              download=True,transform=transform)
test_dataset = mnist.MNIST(root='data/MNIST/test', train=False,
                             download=True, transform=test_transform)

x_tr = train_dataset.data.numpy()
y_tr = train_dataset.targets.numpy()
x_tr = x_tr.reshape((-1, 28*28))

x_te = test_dataset.data.numpy()
y_te = test_dataset.targets.numpy()
x_te = x_te.reshape((-1, 28*28))
all_x = np.concatenate((x_tr,x_te),axis=0)
all_y = np.concatenate((y_tr,y_te),axis=0)
train_length = len(x_tr)

In [40]:
pca = PCA(n_components=10)
all_x_pca = pca.fit_transform(all_x)
x_tr = all_x_pca[:train_length]
x_te = all_x_pca[train_length:]

print(x_tr.shape)
print(x_te.shape)

(60000, 10)
(10000, 10)


In [52]:
SAVE_DIR = 'Models/'
for SEED in range(5):
    np.random.seed(SEED)
    all_index = np.arange(len(x_tr))
    np.random.shuffle(all_index)
    train_index = all_index[0:int(0.9*len(x_tr))]
    val_index = all_index[int(0.9*len(x_tr)):]

    train_x = x_tr[train_index]
    train_y = y_tr[train_index]
    val_x = x_tr[val_index]
    val_y = y_tr[val_index]
    # Train model
    xgb_clf = xgboost.XGBClassifier(use_label_encoder=False)
#     xgb_clf = xgboost.XGBClassifier(use_label_encoder=False,n_estimators=100,max_depth=25,max_leaves=30,learning_rate=0.01)
    xgb_clf.fit(train_x, train_y)
    # Save model
    save_path = SAVE_DIR + 'MNIST_XGBoost_SEED_%d.pkl'%SEED
    pickle.dump(xgb_clf, open(save_path, 'wb'))
    # Load model
    xgb_val_clf = pickle.load(open(save_path, 'rb'))
    
    y_pred = xgb_val_clf.predict(val_x)
    score = metrics.accuracy_score(val_y, y_pred)
    print('SEED:%d,Accuracy Score:%.4f%%'%(SEED,100*score))

SEED:0,Accuracy Score:92.3833%
SEED:1,Accuracy Score:92.5167%
SEED:2,Accuracy Score:92.6000%
SEED:3,Accuracy Score:91.7333%
SEED:4,Accuracy Score:92.0667%


In [45]:
SAVE_DIR = 'Models/'
for SEED in range(5):
    np.random.seed(SEED)
    all_index = np.arange(len(x_tr))
    np.random.shuffle(all_index)
    train_index = all_index[0:int(0.9*len(x_tr))]
    val_index = all_index[int(0.9*len(x_tr)):]

    train_x = x_tr[train_index]
    train_y = y_tr[train_index]
    val_x = x_tr[val_index]
    val_y = y_tr[val_index]
    val_y_onehot = np.eye(10)[val_y]

    
    save_path = SAVE_DIR + 'MNIST_XGBoost_SEED_%d.pkl'%SEED
    
    xgb_val_clf = pickle.load(open(save_path, 'rb'))
    
    y_pred = xgb_val_clf.predict(val_x)
    y_prob = xgb_val_clf.predict_proba(val_x)
    
    score = metrics.accuracy_score(val_y, y_pred)
    F1_score = metrics.f1_score(val_y, y_pred, average='weighted')
    ROC_AUC_score = metrics.roc_auc_score(val_y_onehot, y_prob,average='samples'XGBoost)
    print('SEED:%d'%(SEED))
    print('Accuracy Score:', score)
    print('F1 Score:', F1_score)
    print('ROC AUC Score:', ROC_AUC_score)
    print('Average score:%.4f'%((score+F1_score+ROC_AUC_score)/3))

SEED:0
Accuracy Score: 0.9238333333333333
F1 Score: 0.9237992129953445
ROC AUC Score: 0.9871481481481481
Average score:0.9449
SEED:1
Accuracy Score: 0.9251666666666667
F1 Score: 0.9251686430168448
ROC AUC Score: 0.9865185185185186
Average score:0.9456
SEED:2
Accuracy Score: 0.926
F1 Score: 0.9259002526114295
ROC AUC Score: 0.9864814814814814
Average score:0.9461
SEED:3
Accuracy Score: 0.9173333333333333
F1 Score: 0.9174651460380686
ROC AUC Score: 0.984888888888889
Average score:0.9399
SEED:4
Accuracy Score: 0.9206666666666666
F1 Score: 0.9206766005158774
ROC AUC Score: 0.9869259259259261
Average score:0.9428


In [47]:
SAVE_DIR = 'Models/'
SEED = 2

y_te_onehot = np.eye(10)[y_te]
save_path = SAVE_DIR + 'MNIST_XGBoost_SEED_%d.pkl'%SEED

xgb_val_clf = pickle.load(open(save_path, 'rb'))

y_pred = xgb_val_clf.predict(x_te)
y_prob = xgb_val_clf.predict_proba(x_te)

score = metrics.accuracy_score(y_te, y_pred)
F1_score = metrics.f1_score(y_te, y_pred, average='weighted')
ROC_AUC_score = metrics.roc_auc_score(y_te_onehot, y_prob,multi_class='ovo')
print('SEED:%d'%(SEED))
print('Accuracy Score:', score)
print('F1 Score:', F1_score)
print('ROC AUC Score:', ROC_AUC_score)

SEED:2
Accuracy Score: 0.9237
F1 Score: 0.9236704964124014
ROC AUC Score: 0.9957977542440413
