In [1]:
import pickle

from train_model import train_step, test_step
from utils.load_data import get_data
from utils.make_dict import train_bow, get_bow

In [2]:
args ={'dataset': 'MNIST',
       'dataroot': './data',
       'model': 'randomforest',
       'kernel': 'gaussian',
       'validation': 0.1,
       'C': 5.0,
       'sigma': 1.0,
       'batch': 5000,
       'dict_size': 100,
       'train': True,
       'load_cluster': False,
       'cuda': True,
       'depth': 50,
       'forest': 10,
       'bag_size': 5000
       }

In [3]:
trainX, trainy = get_data(dataset=args['dataset'], train=True, dataroot=args['dataroot'])

if args['dataset'] == 'cifar10':
    trainX = trainX.reshape((-1, 32, 32, 3), order='F')

if args['load_cluster']:
    with open("./cluster.dump", "rb") as f:
        cluster = pickle.load(f)
else:
    cluster = train_bow(trainX, num_dict=args['dict_size'], num_select=10000)
    with open("./cluster.dump", "wb") as f:
        pickle.dump(cluster, f)

trainFeature = get_bow(trainX, cluster, num_dict=args['dict_size'])

In [4]:
if args['model'] == 'custom_SVM' or args['model'] == 'sklearn_SVM':
    hyper_C = [0.1, 0.5, 1.0, 5.0]
    hyper_sigma = [1e-2, 0.5, 1.0, 5.0] 

    best_C = None
    best_sigma = None
    best_valid = 0.0

    for C in hyper_C:
        for sigma in hyper_sigma:
            # Test hyperparameter
            args['C'] = C
            args['sigma'] = sigma

            # Get result
            _, train_acc_list, valid_acc_list  = \
                train_step(args, trainFeature, trainy)

            # Evaluation parameter
            tra = sum(train_acc_list) / len(train_acc_list)
            val = sum(valid_acc_list) / len(valid_acc_list)

            if val > best_valid:
                best_valid = val
                best_C = C
                best_sigma = sigma

            # Print result
            print("C: %f Sigma: %f Train accuracy: %f Valid accuracy: %f"%(C, sigma, tra, val))

    print("Best C: %f Best sigma: %f"%(best_C, best_sigma))

    args['C'] = best_C
    args['sigma'] = best_sigma
    args['part'] = False
    models, train_acc_list, valid_acc_list = \
                train_step(args, trainFeature, trainy)
else:
    models, train_acc_list, valid_acc_list  = \
                train_step(args, trainFeature, trainy)

100%|██████████| 10/10 [10:59<00:00, 65.98s/it]
100%|██████████| 10/10 [00:06<00:00,  1.66it/s]
100%|██████████| 10/10 [00:00<00:00, 13.15it/s]


In [5]:
testX, testy = get_data(dataset=args['dataset'], train=False, dataroot=args['dataroot'])
if args['dataset'] == 'cifar10':
    testX = testX.reshape((-1, 32, 32, 3), order='F')
testFeature = get_bow(testX, cluster, num_dict=args['dict_size'])

In [6]:
pred = models.predict(testFeature)

100%|██████████| 10/10 [00:01<00:00,  9.36it/s]


In [7]:
test_acc_list, test_prec_list, test_recall_list, test_f1_list = test_step(args, testFeature, testy, models)

100%|██████████| 10/10 [00:01<00:00,  9.66it/s]


In [8]:
print("mA:", sum(test_acc_list) / len(test_acc_list))
print("mP:", sum(test_prec_list) / len(test_prec_list))
print("mR:", sum(test_recall_list) / len(test_recall_list))
print("mF1:", sum(test_f1_list) / len(test_f1_list))

mA: 0.05908999994091
mP: 0.13497932935718165
mR: 0.5843576972045138
mF1: 0.21913495115324774
