In [45]:
# loading package
from keras.datasets import mnist
import numpy as np
from sklearn import svm
from sklearn.decomposition import PCA
import time

# arguments
TRAIN_NUM = 10000
TEST_NUM = 100
COMPONENT_NUM = 0

def import_data(TRAIN_NUM, TEST_NUM):
    TRAIN_DATA_3D = np.empty((TRAIN_NUM, 28, 28))
    TRAIN_LABEL = np.empty(TRAIN_NUM)
    TEST_DATA_3D = np.empty((TEST_NUM, 28, 28))
    TEST_LABEL = np.empty(TEST_NUM)
    TRAIN_DATA_3D[0: TRAIN_NUM] = X_TRAIN[0: TRAIN_NUM]
    TRAIN_LABEL[0: TRAIN_NUM] = Y_TRAIN[0: TRAIN_NUM]
    TEST_DATA_3D[0: TEST_NUM] = X_TEST[0: TEST_NUM]
    TEST_LABEL[0: TEST_NUM] = Y_TEST[0: TEST_NUM]
    TRAIN_DATA_2D = TRAIN_DATA_3D.reshape(TRAIN_NUM, -1)
    TEST_DATA_2D = TEST_DATA_3D.reshape(TEST_NUM, -1)
    return TRAIN_DATA_2D, TRAIN_LABEL, TEST_DATA_2D, TEST_LABEL

def pca_dimension_reduce(TRAIN_DATA, TEST_DATA, COMPONENT_NUM):
    pca = PCA(n_components = COMPONENT_NUM)
    train_reduce = pca.fit_transform(TRAIN_DATA)
    test_reduce = pca.transform(TEST_DATA)
    return train_reduce, test_reduce

# def the svm module
# time calculate ref: https://pynative.com/python-get-execution-time-of-program/
def svm_class(TRAIN_DATA, TRAIN_LABEL, TEST_DATA, TEST_LABEL):
    TRAIN_st = time.time()
    polyModel = svm.SVC(kernel='poly', degree=3, gamma='auto', C=1)
    polyModel.fit(TRAIN_DATA, TRAIN_LABEL)
    TRAIN_et = time.time()
    ACCURACY = polyModel.score(TEST_DATA, TEST_LABEL)
    TEST_et = time.time()
    TRAIN_cost = TRAIN_et - TRAIN_st
    TEST_cost = TEST_et - TRAIN_et
    return ACCURACY, TRAIN_cost, TEST_cost

# import data
(X_TRAIN, Y_TRAIN), (X_TEST, Y_TEST) = mnist.load_data()
TRAIN_DATA, TRAIN_LABEL, TEST_DATA, TEST_LABEL = import_data(TRAIN_NUM, TEST_NUM)

if COMPONENT_NUM == 0:
    print('Train data shape: ' + str(TRAIN_DATA.shape))
    print('Test data shape: ' + str(TEST_DATA.shape))
    ACCURACY, TRAIN_COST, TEST_COST = svm_class(TRAIN_DATA, TRAIN_LABEL, TEST_DATA, TEST_LABEL)
    print('The SVM module accuracy: ' + str(ACCURACY) + ' and training consume: ' + str(round(TRAIN_COST, 2)) + ' sec, testing consume: ' + str(round(TEST_COST, 2)) + ' sec')
else:
    TRAIN_REDUCE, TEST_REDUCE = pca_dimension_reduce(TRAIN_DATA, TEST_DATA, COMPONENT_NUM)
    print('Reduce dimension train data shape: ' + str(TRAIN_REDUCE.shape))
    print('Reduce dimension test data shape: ' + str(TEST_REDUCE.shape))
    ACCURACY, TRAIN_COST, TEST_COST = svm_class(TRAIN_REDUCE, TRAIN_LABEL, TEST_REDUCE, TEST_LABEL)
    print('The SVM module accuracy: ' + str(ACCURACY) + ' and training consume: ' + str(round(TRAIN_COST, 2)) + ' sec, testing consume: ' + str(round(TEST_COST, 2)) + ' sec')


Train data shape: (10000, 784)
Test data shape: (100, 784)
The SVM module accuracy: 0.99 and training consume: 8.58 sec, testing consume: 0.08 sec


In [29]:
# Usage
# python3 SVM.py TRAIN_DATA_NUM TEST_DATA_NUM PCA_COMPONENT_NUM
# note: PCA_COMPONENT_NUM = 0 (run SVM without reduce dimension)

# loading package
from keras.datasets import mnist
import numpy as np
from sklearn import svm
from sklearn.decomposition import PCA
import time
import sys

# arguments
TRAIN_NUM = sys.argv[1]
TEST_NUM = sys.argv[2]
COMPONENT_NUM = sys.argv[3]

def import_data(TRAIN_NUM, TEST_NUM):
    TRAIN_DATA_3D = np.empty((TRAIN_NUM, 28, 28))
    TRAIN_LABEL = np.empty(TRAIN_NUM)
    TEST_DATA_3D = np.empty((TEST_NUM, 28, 28))
    TEST_LABEL = np.empty(TEST_NUM)
    TRAIN_DATA_3D[0: TRAIN_NUM] = X_TRAIN[0: TRAIN_NUM]
    TRAIN_LABEL[0: TRAIN_NUM] = Y_TRAIN[0: TRAIN_NUM]
    TEST_DATA_3D[0: TEST_NUM] = X_TEST[0: TEST_NUM]
    TEST_LABEL[0: TEST_NUM] = Y_TEST[0: TEST_NUM]
    TRAIN_DATA_2D = TRAIN_DATA_3D.reshape(TRAIN_NUM, -1)
    TEST_DATA_2D = TEST_DATA_3D.reshape(TEST_NUM, -1)
    return TRAIN_DATA_2D, TRAIN_LABEL, TEST_DATA_2D, TEST_LABEL

def pca_dimension_reduce(TRAIN_DATA, TEST_DATA, COMPONENT_NUM):
    pca = PCA(n_components = COMPONENT_NUM)
    train_reduce = pca.fit_transform(TRAIN_DATA)
    test_reduce = pca.transform(TEST_DATA)
    return train_reduce, test_reduce

# def the svm module
# time calculate ref: https://pynative.com/python-get-execution-time-of-program/
def svm_class(TRAIN_DATA, TRAIN_LABEL, TEST_DATA, TEST_LABEL):
    st = time.time()
    polyModel = svm.SVC(kernel='poly', degree=3, gamma='auto', C=1)
    polyModel.fit(TRAIN_DATA, TRAIN_LABEL)
    ACCURACY = polyModel.score(TEST_DATA, TEST_LABEL)
    et = time.time()
    cost = et - st
    return ACCURACY, cost

# import data
(X_TRAIN, Y_TRAIN), (X_TEST, Y_TEST) = mnist.load_data()
TRAIN_DATA, TRAIN_LABEL, TEST_DATA, TEST_LABEL = import_data(TRAIN_NUM, TEST_NUM)

if COMPONENT_NUM == 0:
    print('Train data shape: ' + str(TRAIN_DATA.shape))
    print('Test data shape: ' + str(TEST_DATA.shape))
    ACCURACY, COST = svm_class(TRAIN_DATA, TRAIN_LABEL, TEST_DATA, TEST_LABEL)
    print('The SVM module accuracy: ' + str(ACCURACY) + ' and consume: ' + str(round(COST, 2)) + ' sec')
else:
    TRAIN_REDUCE, TEST_REDUCE = pca_dimension_reduce(TRAIN_DATA, TEST_DATA, COMPONENT_NUM)
    print('Reduce dimension train data shape: ' + str(TRAIN_REDUCE.shape))
    print('Reduce dimension test data shape: ' + str(TEST_REDUCE.shape))
    ACCURACY, COST = svm_class(TRAIN_REDUCE, TRAIN_LABEL, TEST_REDUCE, TEST_LABEL)
    print('The SVM module accuracy: ' + str(ACCURACY) + ' and consume: ' + str(round(COST, 2)) + ' sec')


Reduce dimension train data shape: (1000, 30)
Reduce dimension test data shape: (100, 30)
The SVM module accuracy: 0.92 and consume: 0.09 sec
