In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
import optuna
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score, classification_report

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
data = pd.read_csv(r'/media/minhman/01DA5FE5725E67B0/Code_HK6/NMTGMT/densenet_features_with_filenames.csv')

In [3]:
data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1912,1913,1914,1915,1916,1917,1918,1919,label,filename
0,0.000077,0.001443,0.000719,0.001873,0.000745,0.000065,0.000515,0.000278,0.000109,0.015095,...,0.581535,0.343685,0.408467,0.162864,0.638420,0.291200,0.053712,0.259736,1,28908.jpg
1,0.000184,0.000885,0.000852,0.002641,0.000381,0.000051,0.000537,0.000199,0.000071,0.045185,...,0.062064,1.284115,5.471166,0.593731,0.058908,0.007260,0.600492,0.706881,2,35766.jpg
2,0.000106,0.000521,0.001584,0.005209,0.000385,0.000106,0.000534,0.000339,0.000085,0.026248,...,0.100823,0.981890,0.425639,2.002005,0.271025,0.679370,0.177648,2.071562,1,19834.jpg
3,0.000130,0.000921,0.001510,0.002226,0.000608,0.000041,0.000079,0.000386,0.000109,0.011693,...,0.255094,1.970089,3.080681,0.716955,0.530225,0.500160,0.772827,0.122548,1,45276.jpg
4,0.000140,0.000865,0.000958,0.003322,0.001317,0.000203,0.000172,0.000470,0.000036,0.010904,...,1.373008,2.070337,2.454356,0.076358,0.171203,0.343563,0.283609,0.742759,1,23209.jpg
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10612,0.000059,0.000363,0.001142,0.002790,0.000615,0.000089,0.000138,0.000222,0.000132,0.017571,...,0.353087,0.137852,2.559826,0.592890,0.509755,1.989417,0.004787,0.171616,2,35120.jpg
10613,0.000080,0.000376,0.001320,0.001434,0.000557,0.000207,0.000680,0.000619,0.000031,0.049196,...,0.015017,1.169918,1.422654,0.299659,1.298084,0.528012,0.703754,0.645925,0,15380.jpg
10614,0.000086,0.001008,0.002121,0.002585,0.000731,0.000242,0.000457,0.000313,0.000091,0.026664,...,0.029386,1.580187,2.378851,0.181816,0.470450,0.023741,0.242935,0.256606,2,14830.jpg
10615,0.000055,0.000585,0.000822,0.001179,0.001057,0.000124,0.000508,0.000515,0.000109,0.026526,...,0.105542,0.078668,0.361497,1.082828,0.627888,0.224114,0.351183,0.615589,0,2854.jpg


In [4]:
X_data = data.iloc[:, :-2]
y_data = data.iloc[:, [-2]]

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.3, random_state=42)
X_train, X_valid, y_train , y_valid = train_test_split(X_train, y_train, test_size=0.1, random_state=42)

### CatBoost

### LightGBM

In [6]:
import lightgbm as lgb  # đảm bảo đây là lgb gốc

# Không dùng dòng này nữa:
# lgb = LGBMClassifier()  ❌

# Objective cho Optuna
def objective(trial):
    params = {
        'objective': 'multiclass',
        'metric': 'multi_error',
        'num_class': 3,
        'verbosity': -1,
        'boosting_type': 'gbdt',
        'device': 'gpu', 
        'learning_rate': trial.suggest_float('learning_rate', 1e-3, 0.3, log=True),
        'num_leaves': trial.suggest_int('num_leaves', 20, 150),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 10, 100),
        'feature_fraction': trial.suggest_float('feature_fraction', 0.5, 1.0),
        'bagging_fraction': trial.suggest_float('bagging_fraction', 0.5, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 10),
        'lambda_l1': trial.suggest_float('lambda_l1', 1e-3, 10.0, log=True),
        'lambda_l2': trial.suggest_float('lambda_l2', 1e-3, 10.0, log=True),
    }

    dtrain = lgb.Dataset(X_train, label=y_train)
    dvalid = lgb.Dataset(X_valid, label=y_valid, reference=dtrain)

    model = lgb.train(
    params,
    dtrain,
    valid_sets=[dvalid],
    valid_names=["valid"],
    num_boost_round=1000,
    early_stopping_rounds=50,
    verbose_eval=False
)


    preds = model.predict(X_valid)
    preds_labels = preds.argmax(axis=1)  # vì đây là multiclass
    acc = accuracy_score(y_valid, preds_labels)
    return acc


# Chạy Optuna
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=50)

# In ra kết quả tốt nhất
print("Best trial:")
print(study.best_trial)
print("Best parameters:")
print(study.best_params)

# Huấn luyện lại mô hình với tham số tốt nhất
best_params = study.best_params
best_params['objective'] = 'binary'
best_params['metric'] = 'binary_error'
best_model_lgb = lgb.LGBMClassifier(**best_params)
best_model_lgb.fit(X_train, y_train)

[I 2025-05-20 00:44:26,535] A new study created in memory with name: no-name-44d69d66-d95d-4fb8-a300-291360e502bd
[W 2025-05-20 00:44:26,538] Trial 0 failed with parameters: {'learning_rate': 0.013624561483361147, 'num_leaves': 106, 'max_depth': 10, 'min_data_in_leaf': 11, 'feature_fraction': 0.8853349787357259, 'bagging_fraction': 0.5293223716135522, 'bagging_freq': 9, 'lambda_l1': 0.47440743828396936, 'lambda_l2': 0.0014231244225005417} because of the following error: TypeError("train() got an unexpected keyword argument 'early_stopping_rounds'").
Traceback (most recent call last):
  File "/home/minhman/.local/lib/python3.10/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "/tmp/ipykernel_7333/554199478.py", line 29, in objective
    model = lgb.train(
TypeError: train() got an unexpected keyword argument 'early_stopping_rounds'
[W 2025-05-20 00:44:26,539] Trial 0 failed with value None.


TypeError: train() got an unexpected keyword argument 'early_stopping_rounds'

### Ket qua

In [None]:
# LightGB,
y_pred_lgb= best_model_lgb.predit(X_test)
report_lgb = classification_report(y_test, y_pred_cb)

NameError: name 'best_model_lgb' is not defined

In [None]:
print(report_lgb)

              precision    recall  f1-score   support

           0       0.74      0.80      0.77      1092
           1       0.65      0.63      0.64      1110
           2       0.72      0.68      0.70       984

    accuracy                           0.71      3186
   macro avg       0.71      0.71      0.70      3186
weighted avg       0.70      0.71      0.70      3186

