In [7]:

import numpy as np
import pandas as pd
from pandas import read_csv
from sklearn.model_selection import*
import optuna
from lightgbm import LGBMClassifier
import warnings
warnings.filterwarnings('ignore')

from sklearn.metrics import log_loss
from sklearn.metrics import accuracy_score
from sklearn import preprocessing
# load dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data"
dataset = read_csv(url, header=None)
data = dataset.values
x_data, y_data = data[:, :-1],data[:, -1]
x_data = pd.DataFrame(x_data.astype('float64'))
#Label Encoding Target
le = preprocessing.LabelEncoder()
le.fit(y_data)
y_data = le.transform(y_data)
y_data = pd.DataFrame(y_data.astype('float64'))

def objective(trial):
    params_optuna = {
        'device' : 'gpu',
        'objective': 'binary',
        #'metric': 'cross_entropy',
        'metric': 'binary_logloss',
        'lambda_l1': trial.suggest_float('lambda_l1', 1e-8, 10.0),
        'lambda_l2': trial.suggest_float('lambda_l2', 1e-8, 10.0),
        'num_leaves': trial.suggest_int('num_leaves', 2, 256),
        'feature_fraction': trial.suggest_float('feature_fraction', 0.4, 1.0),
        'bagging_fraction': trial.suggest_float('bagging_fraction', 0.4, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 7),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'max_bin' : trial.suggest_int('max_bin', 150, 512),
        #"boosting" : trial.suggest_categorical("boosting", ["gbdt"]),
        "boosting" : trial.suggest_categorical("boosting", ["gbdt", "rf", "dart"]),
        'learning_rate': trial.suggest_float('learning_rate', 0.1, 1.0),
        'num_iterations': trial.suggest_int('num_iterations', 50, 300),
        #'n_jobs': -1,
        #'is_unbalance': 'false',
    }
    #cv
    cv = StratifiedKFold(n_splits=3, random_state=42, shuffle=True)
    # set so cv to shuffle for train_test split like, e.g. for big datasets
    # cv = ShuffleSplit(n_splits=1, test_size=0.2)
    scores = []
    for train_index,test_index in cv.split(x_data,y_data):
        x_train, y_train = x_data.iloc[train_index],y_data.iloc[train_index]
        x_val, y_val = x_data.iloc[test_index],y_data.iloc[test_index]
    
        classifier = LGBMClassifier(**params_optuna, n_jobs=2, verbose=-1)
        classifier.fit(x_train, y_train)
        # predict probs for logloss
        y_pred = classifier.predict_proba(x_val)[:,1]       
        score = log_loss(y_val, y_pred.astype(np.float64))
        # predict for accuracy
        #y_pred = classifier.predict(x_val)
        #score = accuracy_score(y_val, y_pred.astype(np.float64))
        scores.append(score)
    
    return np.mean(scores)


study = optuna.create_study(study_name="LGB_optuna", direction="minimize")
# Suppress INFO logs by setting the logging level to WARNING
optuna.logging.set_verbosity(optuna.logging.ERROR)
study.optimize(objective, n_trials=100)
study_df_xgb = study.trials_dataframe()
print(study.best_value)
print(study.best_params)
lgb_params = study.best_params   


0.3611105922841189
{'lambda_l1': 0.5413824961820222, 'lambda_l2': 2.4185591357880414, 'num_leaves': 249, 'feature_fraction': 0.9176354752287577, 'bagging_fraction': 0.8229906222437912, 'bagging_freq': 7, 'min_child_samples': 30, 'max_bin': 462, 'boosting': 'gbdt', 'learning_rate': 0.22537390267355806, 'num_iterations': 116}


In [8]:
## Hyperparameter Feature Importance
from optuna.visualization import plot_param_importances
plot_param_importances(study)

In [9]:
from optuna.visualization import plot_parallel_coordinate
plot_parallel_coordinate(study)

In [10]:
from optuna.visualization import plot_slice
plot_slice(study)

In [11]:

from optuna.visualization import plot_optimization_history
plot_optimization_history(study)

In [12]:
from optuna.visualization import plot_contour
plot_contour(study, params=['bagging_fraction', 'bagging_freq'])