In [92]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score,cohen_kappa_score, classification_report
from lightgbm import LGBMClassifier
import polars as pl
import optuna
import warnings

# 忽略all warnings
warnings.filterwarnings('ignore')

In [82]:
iris = load_iris()

data = pl.DataFrame(iris.data)
data.columns = iris.feature_names
target = pl.Series(iris.target)

In [83]:
X_train, X_test, Y_train, Y_test = train_test_split(data, target, test_size=0.2, random_state=42)

In [84]:
X_train.shape, Y_train.shape, X_test.shape, Y_test.shape

((120, 4), (120,), (30, 4), (30,))

In [96]:
def objective(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 1000), # default=100
        'learning_rate': trial.suggest_float('learning_rate', 0.005, 0.5), # default 0.1
        
        "num_leaves": trial.suggest_int("num_leaves", 20, 3000),    # default=31
        "max_depth": trial.suggest_int("max_depth", 3, 12),        # default=-1
        "min_data_in_leaf": trial.suggest_int("min_data_in_leaf", 1, 50), # default=20
        
        'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 10.0),
        'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 10.0),
        
        "min_gain_to_split": trial.suggest_float("min_gain_to_split", 0, 15),
        'random_state': 42,
    }
    # 设置不打印信息
    bst = LGBMClassifier(**params, verbose=-1)
    bst.fit(X_train, Y_train)
    accuracy = accuracy_score(Y_test, bst.predict(X_test))
    
    # 减枝
    trial.report(accuracy, step=trial.number)
    
    # 如果准确率低于 90%，则剪枝
    if accuracy < 0.9:
        raise optuna.exceptions.TrialPruned()
    
    return accuracy

In [97]:
# 对 RandomSampler 而言 MedianPruner 是最好的
# 对 TPESampler 而言 HyperbandPruner 是最好的
optuna.logging.set_verbosity(optuna.logging.INFO)
study = optuna.create_study(
    study_name='LightGBM', direction='maximize', 
    sampler=optuna.samplers.TPESampler(seed=42), pruner=optuna.pruners.HyperbandPruner()
)

[I 2023-12-04 20:43:01,207] A new study created in memory with name: LightGBM


In [98]:
study.optimize(objective, n_trials = 100, show_progress_bar = True, n_jobs=4)

  0%|          | 0/100 [00:00<?, ?it/s]

[I 2023-12-04 20:43:03,636] Trial 0 finished with value: 0.9666666666666667 and parameters: {'n_estimators': 254, 'learning_rate': 0.33061976188412806, 'num_leaves': 2617, 'max_depth': 3, 'min_data_in_leaf': 32, 'reg_alpha': 6.8751244354837695, 'reg_lambda': 6.754810785441291, 'min_gain_to_split': 1.5415589643245498}. Best is trial 0 with value: 0.9666666666666667.
[I 2023-12-04 20:43:03,703] Trial 2 finished with value: 0.9666666666666667 and parameters: {'n_estimators': 316, 'learning_rate': 0.052775231201794955, 'num_leaves': 1203, 'max_depth': 4, 'min_data_in_leaf': 10, 'reg_alpha': 6.4213543819819705, 'reg_lambda': 2.1137586731107705, 'min_gain_to_split': 12.471546821693167}. Best is trial 0 with value: 0.9666666666666667.
[I 2023-12-04 20:43:03,882] Trial 3 finished with value: 1.0 and parameters: {'n_estimators': 662, 'learning_rate': 0.4443548022067311, 'num_leaves': 1657, 'max_depth': 6, 'min_data_in_leaf': 7, 'reg_alpha': 1.5827802898554655, 'reg_lambda': 0.6171302463175055, 

In [99]:
best_params = study.best_params
best_value = study.best_value
print(f'Best params: {best_params}')
print(f'Best value: {best_value}\n')

Best params: {'n_estimators': 662, 'learning_rate': 0.4443548022067311, 'num_leaves': 1657, 'max_depth': 6, 'min_data_in_leaf': 7, 'reg_alpha': 1.5827802898554655, 'reg_lambda': 0.6171302463175055, 'min_gain_to_split': 10.6951908148363}
Best value: 1.0


In [100]:
Y_pred = LGBMClassifier(**best_params, verbose=-1).fit(X_train, Y_train).predict(X_test)

print("Accuracy : %.4g" % accuracy_score(Y_test, Y_pred))
print("Precision : %.4g" % precision_score(Y_test, Y_pred, average='macro'))
print("Recall : %.4g" % recall_score(Y_test, Y_pred, average='macro'))
print("F1 : %.4g" % f1_score(Y_test, Y_pred, average='macro'))
print("Cohen's kappa : %.4g" % cohen_kappa_score(Y_test, Y_pred))

Accuracy : 1
Precision : 1
Recall : 1
F1 : 1
Cohen's kappa : 1


In [101]:
print('\n clasification report:\n', classification_report(Y_test, Y_pred))


 clasification report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30


In [102]:
# save log
study.trials_dataframe().to_csv('./Result/study.csv', index=False)