In [1]:
# import libraries
import pandas as pd
import numpy as np
import optuna
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_breast_cancer

#close warnings
import warnings
warnings.filterwarnings('ignore')

In [2]:
# load dataset
data = load_breast_cancer()

# Convert data's properties to a DataFrame
df = pd.DataFrame(data.data, columns=data.feature_names)

# View the first few lines
print(df.head()) 

   mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0        17.99         10.38          122.80     1001.0          0.11840   
1        20.57         17.77          132.90     1326.0          0.08474   
2        19.69         21.25          130.00     1203.0          0.10960   
3        11.42         20.38           77.58      386.1          0.14250   
4        20.29         14.34          135.10     1297.0          0.10030   

   mean compactness  mean concavity  mean concave points  mean symmetry  \
0           0.27760          0.3001              0.14710         0.2419   
1           0.07864          0.0869              0.07017         0.1812   
2           0.15990          0.1974              0.12790         0.2069   
3           0.28390          0.2414              0.10520         0.2597   
4           0.13280          0.1980              0.10430         0.1809   

   mean fractal dimension  ...  worst radius  worst texture  worst perimeter  \
0           

In [3]:
# Separating the data set into training and testing sets
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
study = optuna.create_study(direction="maximize")

[I 2024-04-22 10:43:46,144] A new study created in memory with name: no-name-f4ecee55-c4b6-4387-8d26-b3d86bd32021


In [5]:
# Defining a target function
def objective(trial):
  # Determine hyperparameter values
  learning_rate = trial.suggest_float("learning_rate", 0.01, 0.1)
  num_leaves = trial.suggest_int("num_leaves", 2, 256)
  max_depth = trial.suggest_int("max_depth", -1, 50)
  min_child_samples = trial.suggest_int("min_child_samples", 5, 100)
  subsample = trial.suggest_float("subsample", 0.5, 1.0)
  colsample_bytree = trial.suggest_float("colsample_bytree", 0.5, 1.0)
  n_estimators = trial.suggest_int("n_estimators", 100, 1000)
  
  # Create and train the model
  model = lgb.LGBMClassifier(
  learning_rate=learning_rate,
  num_leaves=num_leaves,
  max_depth=max_depth,
  min_child_samples=min_child_samples,
  subsample=subsample,
  colsample_bytree=colsample_bytree,
  n_estimators=n_estimators,
  random_state=42
  )
  model.fit(X_train, y_train)
  
  # Evaluate the model and return the metric
  y_pred = model.predict(X_test)
  accuracy = accuracy_score(y_test, y_pred)
  return accuracy

# Run the study and review the results
study.optimize(objective, n_trials=20)
print("Best trial:")
print(" Value: {}".format(study.best_trial.value))
print(" Params: {}".format(study.best_trial.params))

[I 2024-04-22 10:43:47,885] Trial 0 finished with value: 0.9736842105263158 and parameters: {'learning_rate': 0.09546003643702863, 'num_leaves': 188, 'max_depth': 0, 'min_child_samples': 17, 'subsample': 0.6794241506069079, 'colsample_bytree': 0.6313640248593912, 'n_estimators': 837}. Best is trial 0 with value: 0.9736842105263158.
[I 2024-04-22 10:43:48,185] Trial 1 finished with value: 0.9649122807017544 and parameters: {'learning_rate': 0.010523565467553331, 'num_leaves': 243, 'max_depth': 48, 'min_child_samples': 68, 'subsample': 0.6890749488348794, 'colsample_bytree': 0.6085217231914155, 'n_estimators': 509}. Best is trial 0 with value: 0.9736842105263158.
[I 2024-04-22 10:43:49,151] Trial 2 finished with value: 0.9649122807017544 and parameters: {'learning_rate': 0.03356945508143604, 'num_leaves': 226, 'max_depth': 36, 'min_child_samples': 13, 'subsample': 0.832775692766962, 'colsample_bytree': 0.5893478292965865, 'n_estimators': 349}. Best is trial 0 with value: 0.97368421052631

Best trial:
 Value: 0.9736842105263158
 Params: {'learning_rate': 0.09546003643702863, 'num_leaves': 188, 'max_depth': 0, 'min_child_samples': 17, 'subsample': 0.6794241506069079, 'colsample_bytree': 0.6313640248593912, 'n_estimators': 837}
