# Train mô hình Support Vector Machine

In [1]:
import numpy as np
import random

SEED = 42
np.random.seed(SEED)
random.seed(SEED)

In [2]:
import os
import json
import joblib
from sklearn.svm import SVC
import pandas as pd


## Prepare Data

In [3]:
def prepareData(dir_train, dir_test):
    data_train = pd.read_csv(dir_train)
    data_test = pd.read_csv(dir_test)

    X_train = data_train.drop(['Activity', 'Activity_code'], axis=1)
    y_train = data_train['Activity_code']

    X_test = data_test.drop(['Activity', 'Activity_code'], axis=1)
    y_test = data_test['Activity_code']

    return X_train, X_test, y_train, y_test


## Load Best Hyperparameter

In [4]:
def load_parameters_from_json(json_path):
    if not os.path.exists(json_path):
        raise FileNotFoundError(f"File {json_path} không tồn tại!")
    with open(json_path, 'r', encoding='utf-8') as file:
        params = json.load(file)
    return params


## Create train Model function

In [9]:
def train_SVM(X_train, y_train, X_test, y_test, params):
    
    C = params['C']
    kernel = params['kernel']
    degree = params['degree']
    gamma = params['gamma']
    coef0 = params['coef0'] 
 
    model = SVC(C=C, kernel=kernel, degree=degree, gamma=gamma, coef0=coef0)
    model.fit(X_train, y_train)
    
    y_pre = model.predict(X_test)
    accuracy = model.score(X_test, y_test)
    print(f"Accuracy: {accuracy}")

    return model

## Save Model

In [6]:
def save_model(model, model_path):
    joblib.dump(model, model_path)
    print(f"Mô hình đã được lưu tại: {model_path}")

## Train model

In [10]:
dir_train = "../../data/interim/trainFinal.csv"
dir_test = "../../data/interim/testFinal.csv"
dir_param = "../hyperparameter_tuning/Best_Hyperparameter/best_parameter_SVM.json"
params = load_parameters_from_json(dir_param)

X_train = X_train.values
X_test = X_test.values

In [11]:
model = train_SVM(X_train, y_train, X_test, y_test, params)
save_model(model, "../../models/SVM_model.pkl")

Accuracy: 0.9630132337970818
Mô hình đã được lưu tại: ../../models/SVM_model.pkl


## Train model with PCA

In [12]:
dir_train = "../../data/interim/train_PCA.csv"
dir_test = "../../data/interim/test_PCA.csv"

data_train_PCA = pd.read_csv(dir_train)
data_test_PCA = pd.read_csv(dir_test)

model_PCA = train_SVM(data_train_PCA, y_train, data_test_PCA, y_test, params)

Accuracy: 0.9114353579911775
