In [1]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.fftpack import fft
from sklearn import svm
from sklearn.metrics import accuracy_score

In [2]:
random = np.random.RandomState(0)
orgn_N, test_N, K = 10000, 2000, 5
orgn_data = np.hstack([
    (np.load('data_hw2/train_data.npy') - 10) / 20, np.expand_dims(np.load('data_hw2/train_label.npy'), axis=1)])
test_data = np.hstack([
    (np.load('data_hw2/test_data.npy') - 10) / 20, np.expand_dims(np.load('data_hw2/test_label.npy'), axis=1)])
random.shuffle(orgn_data)
random.shuffle(test_data)
orgn_data = orgn_data[:orgn_N]
test_data = test_data[:test_N]
fold_size = orgn_N // K
fold_data = [orgn_data[i*fold_size: (i+1)*fold_size] for i in range(K - 1)]
fold_data.append(orgn_data[(K-1)*fold_size:])

In [3]:
DIM = 310

def decision2proba(x):
    return 1 / (1 + np.exp(-x))

def decision2class(x):
    return (x > 0).astype(np.int8)

def _1vsRest(template, train_y, valid_y, tests_y):
    proj = lambda y: np.array(template)[y.astype(np.int8)]
    return proj(train_y), proj(valid_y), proj(tests_y)

for i in range(K):
    models = [
        svm.SVC(kernel='rbf', gamma=0.01, C=2),
        svm.SVC(kernel='rbf', gamma=1, C=1),
        svm.SVC(kernel='rbf', gamma=0.008, C=1),
    ]
    
    valid_data = fold_data[i]
    train_data = np.vstack(fold_data[:i] + fold_data[i+1:])
    train_X, train_y = train_data[:,:DIM], train_data[:,310]
    valid_X, valid_y = valid_data[:,:DIM], valid_data[:,310]
    tests_X, tests_y = test_data[:, :DIM], test_data[:, 310]
    predict_valid, predict_tests = [], []
    
    # 0 vs 1, -1; 1 vs 0, -1; -1 vs 0, 1
    for j in range(3):
        _train_y, _valid_y, _tests_y = _1vsRest(np.eye(3)[j], train_y, valid_y, tests_y)
        models[j].fit(train_X, _train_y)
        valid_acc = accuracy_score(_valid_y, decision2class(models[j].decision_function(valid_X)))
        tests_acc = accuracy_score(_tests_y, decision2class(models[j].decision_function(tests_X)))
        predict_valid.append(decision2proba(models[j].decision_function(valid_X)))
        predict_tests.append(decision2proba(models[j].decision_function(tests_X)))
        print("model_%d valid_acc=%.4f tests_acc=%.4f" % (j, valid_acc, tests_acc))

    valid_acc = accuracy_score(valid_y, np.array([0, 1, -1])[np.argmax(predict_valid, axis=0)])
    tests_acc = accuracy_score(tests_y, np.array([0, 1, -1])[np.argmax(predict_tests, axis=0)])
    print("Fold %d: valid=%.4f test=%.4f\n" % (i, valid_acc, tests_acc))

model_0 valid_acc=0.8225 tests_acc=0.6845
model_1 valid_acc=1.0000 tests_acc=0.6765
model_2 valid_acc=0.6905 tests_acc=0.6575
Fold 0: valid=0.8270 test=0.5590

model_0 valid_acc=0.8225 tests_acc=0.6765
model_1 valid_acc=1.0000 tests_acc=0.6765
model_2 valid_acc=0.6990 tests_acc=0.6575
Fold 1: valid=0.8425 test=0.5725

model_0 valid_acc=0.8390 tests_acc=0.6935
model_1 valid_acc=1.0000 tests_acc=0.6765
model_2 valid_acc=0.7010 tests_acc=0.6575
Fold 2: valid=0.8395 test=0.5385

model_0 valid_acc=0.8335 tests_acc=0.6895
model_1 valid_acc=1.0000 tests_acc=0.6765
model_2 valid_acc=0.6820 tests_acc=0.6575
Fold 3: valid=0.8315 test=0.5765

model_0 valid_acc=0.8125 tests_acc=0.6770
model_1 valid_acc=1.0000 tests_acc=0.6765
model_2 valid_acc=0.6870 tests_acc=0.6575
Fold 4: valid=0.8355 test=0.5715

