# SVM-sklearn
## 1 实现方法
通过初始化SVM线性模型，并输入训练数据，其能自动训练。将测试数据输入模型，得到准确度。准确度的计算方式是，模型计算出的预测值与实际值差的绝对值的平均。
## 2 参数
经过测试与选取，我选择了如下参数：
THRESHOLD = 1e-3
MAX_ITERATION = -1
其中，选择THRESHOLD = 1e-3是为了在保证时间较短情况下增加准确度，选择MAX_ITERATION = -1是为了避免其提早结束导致准确度骤降（若选择和梯度下降相同的最大迭代次数，即8000次，则其准确度只有0.71，并抛出警告其提前结束）
## 3 结果
根据上述参数配置，得到结果如下：
train_acc: 0.975
test_acc: 0.91
Running time: 1.3125 Second
可以发现，其在THRESHOLD约为$1 \cdot 10^{-3}$时达到稳定，此时运行时间为1.3125s，准确率为0.91。
最终结果为：
w=[-2.802051483528587,0.06326673,0.07667397,0.18258601,0.60309771,-0.28708404,-0.20567921,0.4027636,-0.65930869,-0.84561177,-0.81467686,0.52399726,-0.67723609,-0.20361242 -0.44343857,-0.22124865,0.50415385,-0.41556441,-0.12376121,0.01310567,-0.51855263,-0.5626143,0.44617828,0.50301771,-0.07107305,-0.2538178,0.99309579,0.52277441,0.00215304,0.00112656]

In [2]:
import time
import numpy as np
from sklearn.svm import SVC

PATH_X_TRAIN = "./data/X_train.csv"
PATH_Y_TRAIN = "./data/Y_train.csv"
PATH_X_TEST = "./data/X_test.csv"
PATH_Y_TEST = "./data/Y_test.csv"
IS_CONTAINHEAD = True
IS_NORMALIZE = False
NEED_RESHAPE = False
THRESHOLD = 1e-3
MAX_ITERATION = -1

# 打印选项
# 打印总结果
PRINT_RES = True
# 打印运行时间
PRINT_TIME = True


def change_dir():
    import os
    import sys
    os.chdir(sys.path[0])
    return sys.path[0]


def read_csv(path, option="x", need_reshape=NEED_RESHAPE):
    data = np.loadtxt(path, dtype=float, delimiter=',',
                      skiprows=int(IS_CONTAINHEAD))
    if option == "y":
        data[data == 0] = -1
        if need_reshape:
            data = np.reshape(data, (np.shape(data)[0], 1))
    return data


def normalize_data(data):
    mean = np.mean(data, axis=0)
    std = np.std(data, axis=0)
    for i in range(data.shape[0]):
        data[i, :] = (data[i, :] - mean) / std
    return data


def caculate_by_svm_sklearn(train_x, train_y, test_x, test_y, threshold=THRESHOLD, max_iteration=MAX_ITERATION, is_normalize=IS_NORMALIZE):
    if (is_normalize):
        train_x = normalize_data(train_x)
        test_x = normalize_data(test_x)
    svm = SVC(kernel='linear', tol=threshold, max_iter=max_iteration)
    svm.fit(train_x, train_y)

    if PRINT_RES:
        predict_y_train = svm.predict(train_x)
        predict_y_test = svm.predict(test_x)
        print("train_acc:", caculate_acc(train_y, predict_y_train))
        print("test_acc:", caculate_acc(test_y, predict_y_test))
    return svm.intercept_[0], svm.coef_[0]


def caculate_acc(test_y, predict_y):
    correct_prediction = np.equal(predict_y, test_y)
    accuracy = np.mean(correct_prediction.astype(np.float64))
    return accuracy


def main():
    data_x_train = read_csv(PATH_X_TRAIN, "x")
    data_y_train = read_csv(PATH_Y_TRAIN, "y")
    data_x_test = read_csv(PATH_X_TEST, "x")
    data_y_test = read_csv(PATH_Y_TEST, "y")
    # print("read done!")
    res_w_0, res_w = caculate_by_svm_sklearn(data_x_train,
                                             data_y_train, data_x_test, data_y_test)
    if PRINT_RES:
        print(res_w_0, res_w)


if __name__ == "__main__":
    start_time = time.process_time()
    change_dir()
    main()
    end_time = time.process_time()
    if PRINT_TIME:
        print("Running time: %s Second" % (end_time-start_time))


train_acc: 0.975
test_acc: 0.91
-2.802051483528587 [-0.06326673  0.07667397  0.18258601  0.60309771 -0.28708404 -0.20567921
  0.4027636  -0.65930869 -0.84561177 -0.81467686  0.52399726 -0.67723609
 -0.20361242 -0.44343857 -0.22124865  0.50415385 -0.41556441 -0.12376121
  0.01310567 -0.51855263 -0.5626143   0.44617828  0.50301771 -0.07107305
 -0.2538178   0.99309579  0.52277441  0.00215304  0.00112656]
Running time: 1.296875 Second
