# Train mô hình Random Forest

In [9]:
import numpy as np
import random

SEED = 42
np.random.seed(SEED)
random.seed(SEED)

In [10]:
import os
import json
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import pandas as pd


## Prepare Data

In [11]:
def prepareData(dir_train, dir_test):
    data_train = pd.read_csv(dir_train)
    data_test = pd.read_csv(dir_test)

    X_train = data_train.drop(['Activity', 'Activity_code'], axis=1)
    y_train = data_train['Activity_code']

    X_test = data_test.drop(['Activity', 'Activity_code'], axis=1)
    y_test = data_test['Activity_code']

    return X_train, X_test, y_train, y_test


## Load Best Hyperparameter

In [12]:
def load_parameters_from_json(json_path):
    if not os.path.exists(json_path):
        raise FileNotFoundError(f"File {json_path} không tồn tại!")
    with open(json_path, 'r', encoding='utf-8') as file:
        params = json.load(file)
    return params


## Create train Model function

In [13]:
def train_random_forest(X_train, y_train, X_test, y_test, params):

    bootstrap = params['bootstrap']=='True'
    max_depth =  params['max_depth']
    min_samples_leaf = params['min_samples_leaf']
    min_samples_split = params['min_samples_split']
    model = RandomForestClassifier(bootstrap = bootstrap, 
                                   max_depth = max_depth, 
                                   min_samples_leaf = min_samples_leaf, 
                                   min_samples_split = min_samples_split)
    model.fit(X_train, y_train)

    return model

## Save Model

In [14]:
def save_model(model, model_path):
    joblib.dump(model, model_path)
    print(f"Mô hình đã được lưu tại: {model_path}")

## Train model

In [15]:
dir_train = "../../data/interim/trainFinal.csv"
dir_test = "../../data/interim/testFinal.csv"
dir_param = "../hyperparameter_tuning/Best_Hyperparameter/best_parameter_RandomForest.json"
params = load_parameters_from_json(dir_param)
X_train, X_test, y_train, y_test = prepareData(dir_train, dir_test)
X_train = X_train.values
X_test = X_test.values

In [16]:
model = train_random_forest(X_train, y_train, X_test, y_test, params)
save_model(model, "../../models/RandomForest_model.pkl")

Mô hình đã được lưu tại: ../../models/RandomForest_model.pkl
