In [1]:
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import Dataset, DataLoader, ConcatDataset
from torchvision import datasets, transforms
from torchvision.datasets import mnist, CIFAR10
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics
from sklearn.decomposition import PCA

import time
import torch.nn.functional as TF
import torch.optim as optim
import os
import math
import matplotlib.pyplot as plt
import pickle

torch.set_num_threads(1)


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [2]:
# (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)

transform = transforms.Compose([
    transforms.Resize(32),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2470, 0.2435, 0.2616]),
])
test_transform = transforms.Compose([
    transforms.Resize(32),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2470, 0.2435, 0.2616]),
])

train_dataset = CIFAR10(root='data/CIFAR10/train', train=True,
                              download=True,transform=transform)
test_dataset = CIFAR10(root='data/CIFAR10/test', train=False,
                             download=True, transform=test_transform)

x_tr = train_dataset.data
y_tr = np.array(train_dataset.targets)
x_tr = x_tr.reshape((-1, 32*32*3))

x_te = test_dataset.data
y_te = np.array(test_dataset.targets)
x_te = x_te.reshape((-1, 32*32*3))

all_x = np.concatenate((x_tr,x_te),axis=0)
all_y = np.concatenate((y_tr,y_te),axis=0)
train_length = len(x_tr)

Files already downloaded and verified
Files already downloaded and verified


In [3]:
pca = PCA(n_components=20)
all_x_pca = pca.fit_transform(all_x)
x_tr = all_x_pca[:train_length]
x_te = all_x_pca[train_length:]

print(x_tr.shape)
print(x_te.shape)

(50000, 20)
(10000, 20)


In [4]:
SAVE_DIR = 'Models/'
for SEED in range(5):
    np.random.seed(SEED)
    all_index = np.arange(len(x_tr))
    np.random.shuffle(all_index)
    train_index = all_index[0:int(0.9*len(x_tr))]
    val_index = all_index[int(0.9*len(x_tr)):]

    train_x = x_tr[train_index]
    train_y = y_tr[train_index]
    val_x = x_tr[val_index]
    val_y = y_tr[val_index]
    # Train model
    rf_clf = RandomForestClassifier(random_state=42)
    rf_clf.fit(train_x, train_y)
    
    save_path = SAVE_DIR + 'CIFAR10_RF_SEED_%d.pkl'%SEED
    pickle.dump(rf_clf, open(save_path, 'wb'))
    
    rf_val_clf = pickle.load(open(save_path, 'rb'))
    
    y_pred = rf_val_clf.predict(val_x)
    score = metrics.accuracy_score(val_y, y_pred)
    print('SEED:%d,Accuracy Score:%.4f%%'%(SEED,100*score))

SEED:0,Accuracy Score:44.0200%
SEED:1,Accuracy Score:44.2200%
SEED:2,Accuracy Score:44.6800%
SEED:3,Accuracy Score:45.1200%
SEED:4,Accuracy Score:44.8600%


In [5]:
SAVE_DIR = 'Models/'
for SEED in range(5):
    np.random.seed(SEED)
    all_index = np.arange(len(x_tr))
    np.random.shuffle(all_index)
    train_index = all_index[0:int(0.9*len(x_tr))]
    val_index = all_index[int(0.9*len(x_tr)):]

    train_x = x_tr[train_index]
    train_y = y_tr[train_index]
    val_x = x_tr[val_index]
    val_y = y_tr[val_index]
    val_y_onehot = np.eye(10)[val_y]

    
    save_path = SAVE_DIR + 'CIFAR10_RF_SEED_%d.pkl'%SEED
    
    rf_val_clf = pickle.load(open(save_path, 'rb'))
    
    y_pred = rf_val_clf.predict(val_x)
    y_prob = rf_val_clf.predict_proba(val_x)
    
    score = metrics.accuracy_score(val_y, y_pred)
    F1_score = metrics.f1_score(val_y, y_pred, average='weighted')
    ROC_AUC_score = metrics.roc_auc_score(val_y_onehot, y_prob,average='samples',multi_class='ovo')
    print('SEED:%d'%(SEED))
    print('Accuracy Score:', score)
    print('F1 Score:', F1_score)
    print('ROC AUC Score:', ROC_AUC_score)
    print('Average score:%.4f'%((score+F1_score+ROC_AUC_score)/3))

SEED:0
Accuracy Score: 0.4402
F1 Score: 0.43688801188276477
ROC AUC Score: 0.8161
Average score:0.5644
SEED:1
Accuracy Score: 0.4422
F1 Score: 0.43921799540670453
ROC AUC Score: 0.8201333333333332
Average score:0.5672
SEED:2
Accuracy Score: 0.4468
F1 Score: 0.4441732193110025
ROC AUC Score: 0.8253888888888888
Average score:0.5721
SEED:3
Accuracy Score: 0.4512
F1 Score: 0.4471638412191526
ROC AUC Score: 0.8216777777777777
Average score:0.5733
SEED:4
Accuracy Score: 0.4486
F1 Score: 0.44592908303048145
ROC AUC Score: 0.825
Average score:0.5732


In [6]:
SAVE_DIR = 'Models/'
SEED = 2

y_te_onehot = np.eye(10)[y_te]
save_path = SAVE_DIR + 'CIFAR10_RF_SEED_%d.pkl'%SEED

rf_val_clf = pickle.load(open(save_path, 'rb'))

y_pred = rf_val_clf.predict(x_te)
y_prob = rf_val_clf.predict_proba(x_te)

score = metrics.accuracy_score(y_te, y_pred)
F1_score = metrics.f1_score(y_te, y_pred, average='weighted')
ROC_AUC_score = metrics.roc_auc_score(y_te_onehot, y_prob,multi_class='ovo')
print('SEED:%d'%(SEED))
print('Accuracy Score:', score)
print('F1 Score:', F1_score)
print('ROC AUC Score:', ROC_AUC_score)

SEED:2
Accuracy Score: 0.4547
F1 Score: 0.45136576970057996
ROC AUC Score: 0.8476753166666666


In [8]:
# for depth in [2,10,20,30,40]:
#     print("RF Max depth %d"%depth)
#     rf_clf = RandomForestClassifier(max_depth=depth,random_state=42)
#     rf_clf.fit(x_tr, y_tr)

#     y_pred = rf_clf.predict(x_te)
#     score = metrics.accuracy_score(y_te, y_pred)
#     print("Accuracy score on test dataset", score)

RF Max depth 2
Accuracy score on test dataset 0.2654
RF Max depth 10
Accuracy score on test dataset 0.4314
RF Max depth 20


KeyboardInterrupt: 