In [None]:

from sklearn.model_selection import StratifiedKFold
from lightgbm import LGBMClassifier
from sklearn.metrics import f1_score
import optuna

class ModelTrainer:
    def __init__(self):
        pass

    def initiate_model_trainer(self, X_train_transformed, y_train):

        def objective(trial):
            params = {
                'random_state': 42,
                'verbose': -1,
                # Optuna sẽ tối ưu 3 tham số bên dưới
                'max_depth': trial.suggest_int('max_depth', 3, 15),
                'learning_rate': trial.suggest_float('learning_rate', 0.005, 0.2),
                'n_estimators': trial.suggest_int('n_estimators', 50, 300)
            }

            skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
            f1_scores = []

            for train_idx, val_idx in skf.split(X_train_transformed, y_train):
                X_train_fold, X_val_fold = X_train_transformed[train_idx], X_train_transformed[val_idx]
                y_train_fold, y_val_fold = y_train.iloc[train_idx], y_train.iloc[val_idx]

                model = LGBMClassifier(**params)
                model.fit(X_train_fold, y_train_fold)

                preds = model.predict(X_val_fold)
                score = f1_score(y_val_fold, preds, average='macro')
                f1_scores.append(score)

            return np.mean(f1_scores)

        # Khởi chạy Optuna
        study = optuna.create_study(direction='maximize')
        study.optimize(objective, n_trials=10)

        print("Best hyperparameters:", study.best_params)
        print("Best CV Macro F1-Score:", study.best_value)

        # Huấn luyện model cuối cùng với toàn bộ dữ liệu
        best_model = LGBMClassifier(
            **study.best_params,
            random_state=42,
            verbose=-1
        )
        best_model.fit(X_train_transformed, y_train)

        # Lưu lại nếu cần:
        self.model = best_model



        