In [1]:
## Import the required packages ##
from sklearn.metrics import recall_score, roc_auc_score, precision_score, f1_score
import optuna
import numpy as np

# Classifiers
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import ExtraTreesClassifier
from xgboost.sklearn import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

In [3]:
# Define your training and test sets
X_train = np.ndarray
X_test = np.ndarray
y_train = np.ndarray
y_test = np.ndarray

## K-Nearest Neighbors

In [None]:
# Define the objective function you want to optimize
def objective(trial):

    # Define the range of the hyperparameters you want to use in the optimization process
    param = {'n_neighbors': trial.suggest_int('n_neighbors', 1, 10)}

    # Use the suggested hyperparameters to initialize the classifier
    clf = KNeighborsClassifier(**param)
    # Train the classifier based on the given hyperparameters
    clf.fit(X_train, y_train)
    # Test the classifier using the test set
    y_pred = clf.predict(X_test)
    
    # Specify the evaluation metric you want to optimize the model based on it
    metric = recall_score(y_true=y_test, y_pred=y_pred)

    # Print the outcomes of each iteration
    print(f"Model Accuracy: {round(metric, 6)}")
    print(f"Model Parameters: {param}")
    return metric

# Use the Optuna to maximize the objective function based on the specified evaluation metrics
study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=1))
study.optimize(objective, n_trials=40)

In [None]:
# Get the number of finished trials
print("Number of finished trials: {}".format(len(study.trials)))
# Get the outcome of the best trial
print("Best trial:")
# Define a variable that represents the best trial
trial = study.best_trial
# Print the best result
print("  Value: {}".format(trial.value))
# Print the best hyperparameters
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

## Decision Tree

In [None]:
# Define the objective function you want to optimize
def objective(trial):

    # Define the range of the hyperparameters you want to use in the optimization process
    param = {'max_depth': trial.suggest_int('max_depth', 1, 10),
             'min_samples_leaf':trial.suggest_int('min_samples_leaf', 1, 10),
             'criterion': trial.suggest_categorical('criterion', ['log_loss', 'gini', 'entropy']),
             'splitter': trial.suggest_categorical('splitter', ['random', 'best'])}

    # Use the suggested hyperparameters to initialize the classifier
    clf = DecisionTreeClassifier(**param)
    # Train the classifier based on the given hyperparameters
    clf.fit(X_train, y_train)
    # Test the classifier using the test set
    y_pred = clf.predict(X_test)

    # Specify the evaluation metric you want to optimize the model based on it
    metric = recall_score(y_true=y_test, y_pred=y_pred)

    # Print the outcomes of each iteration
    print(f"Model Accuracy: {round(metric, 6)}")
    print(f"Model Parameters: {param}")
    return metric

# Use the Optuna to maximize the objective function based on the specified evaluation metrics
study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=1))
study.optimize(objective, n_trials=100)

In [None]:
# Get the number of finished trials
print("Number of finished trials: {}".format(len(study.trials)))
# Get the outcome of the best trial
print("Best trial:")
# Define a variable that represents the best trial
trial = study.best_trial
# Print the best result
print("  Value: {}".format(trial.value))
# Print the best hyperparameters
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

## Extra Trees

In [None]:
# Define the objective function you want to optimize
def objective(trial):

    # Define the range of the hyperparameters you want to use in the optimization process
    param = {'n_estimators':trial.suggest_int('n_estimator', 50, 300),
             'max_depth': trial.suggest_int('max_depth', 1, 10),
             'min_samples_leaf':trial.suggest_int('min_samples_leaf', 1, 10),
             'criterion': trial.suggest_categorical('criterion', ['log_loss', 'gini', 'entropy'])}

    # Use the suggested hyperparameters to initialize the classifier
    clf = ExtraTreesClassifier(**param)

    # Train the classifier based on the given hyperparameters
    clf.fit(X_train, y_train)
    # Test the classifier using the test set
    y_pred = clf.predict(X_test)
    # Specify the evaluation metric you want to optimize the model based on it
    metric = recall_score(y_true=y_test, y_pred=y_pred)

    # Print the outcomes of each iteration
    print(f"Model Accuracy: {round(metric, 6)}")
    print(f"Model Parameters: {param}")
    return metric

# Use the Optuna to maximize the objective function based on the specified evaluation metrics
study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=1))
study.optimize(objective, n_trials=100)

In [None]:
# Get the number of finished trials
print("Number of finished trials: {}".format(len(study.trials)))
# Get the outcome of the best trial
print("Best trial:")
# Define a variable that represents the best trial
trial = study.best_trial
# Print the best result
print("  Value: {}".format(trial.value))
# Print the best hyperparameters
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

## Random Forest

In [81]:
# Define the objective function you want to optimize
def objective(trial):

    # Define the range of the hyperparameters you want to use in the optimization process
    param = {'max_depth': trial.suggest_int('max_depth', 1, 20),
             'min_samples_leaf':trial.suggest_int('min_samples_leaf', 1, 10),
             'criterion': trial.suggest_categorical('criterion', ['log_loss', 'gini', 'entropy']),
             'n_estimators': trial.suggest_int('n_estimators', 100, 1000)}

    # Use the suggested hyperparameters to initialize the classifier
    clf = RandomForestClassifier(**param)

    # Train the classifier based on the given hyperparameters
    clf.fit(X_train, y_train)
    # Test the classifier using the test set
    y_pred = clf.predict(X_test)
    # Specify the evaluation metric you want to optimize the model based on it
    metric = recall_score(y_true=y_test, y_pred=y_pred)

    # Print the outcomes of each iteration
    print(f"Model Accuracy: {round(metric, 6)}")
    print(f"Model Parameters: {param}")
    return metric

# Use the Optuna to maximize the objective function based on the specified evaluation metrics
study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=1))
study.optimize(objective, n_trials=100)

In [None]:
# Get the number of finished trials
print("Number of finished trials: {}".format(len(study.trials)))
# Get the outcome of the best trial
print("Best trial:")
# Define a variable that represents the best trial
trial = study.best_trial
# Print the best result
print("  Value: {}".format(trial.value))
# Print the best hyperparameters
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

## Adaboost

In [99]:
# Define the objective function you want to optimize
def objective(trial):

    # Define the range of the hyperparameters you want to use in the optimization process
    param = {'n_estimators': trial.suggest_int('n_estimators', 25, 500),
             'learning_rate':trial.suggest_float('learning_rate', 0.1, 10)}

    # Use the suggested hyperparameters to initialize the classifier
    clf = AdaBoostClassifier(**param)

    # Train the classifier based on the given hyperparameters
    clf.fit(X_train, y_train)
    # Test the classifier using the test set
    y_pred = clf.predict(X_test)
    # Specify the evaluation metric you want to optimize the model based on it
    metric = recall_score(y_true=y_test, y_pred=y_pred)

    # Print the outcomes of each iteration
    print(f"Model Accuracy: {round(metric, 6)}")
    print(f"Model Parameters: {param}")
    return metric

# Use the Optuna to maximize the objective function based on the specified evaluation metrics
study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=1))
study.optimize(objective, n_trials=100)

In [None]:
# Get the number of finished trials
print("Number of finished trials: {}".format(len(study.trials)))
# Get the outcome of the best trial
print("Best trial:")
# Define a variable that represents the best trial
trial = study.best_trial
# Print the best result
print("  Value: {}".format(trial.value))
# Print the best hyperparameters
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

## Gradient Boosting

In [103]:
# Define the objective function you want to optimize
def objective(trial):

    # Define the range of the hyperparameters you want to use in the optimization process
    param = {'max_depth': trial.suggest_int('max_depth', 1, 30),
             'n_estimators':trial.suggest_int('n_estimators', 50, 1000),
             'criterion': trial.suggest_categorical('criterion', ['friedman_mse', 'squared_error']),
             'learning_rate': trial.suggest_float('learning_rate', 0.1, 5)}

    # Use the suggested hyperparameters to initialize the classifier
    clf = GradientBoostingClassifier(**param)

    # Train the classifier based on the given hyperparameters
    clf.fit(X_train, y_train)
    # Test the classifier using the test set
    y_pred = clf.predict(X_test)
    # Define the range of the hyperparameters you want to use in the optimization process
    metric = recall_score(y_true=y_test, y_pred=y_pred)

    # Print the outcomes of each iteration
    print(f"Model Accuracy: {round(metric, 6)}")
    print(f"Model Parameters: {param}")
    return metric

# Use the Optuna to maximize the objective function based on the specified evaluation metrics
study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=1))
study.optimize(objective, n_trials=100)

In [None]:
# Get the number of finished trials
print("Number of finished trials: {}".format(len(study.trials)))
# Get the outcome of the best trial
print("Best trial:")
# Define a variable that represents the best trial
trial = study.best_trial
# Print the best result
print("  Value: {}".format(trial.value))
# Print the best hyperparameters
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

## XGBoost

In [51]:
# Define the objective function you want to optimize
def objective(trial):

    # Define the range of the hyperparameters you want to use in the optimization process
    param = {'max_depth': trial.suggest_int('max_depth', 1, 30),
             'gamma':trial.suggest_float('gamma', 0.1, 10),
             'min_child_weight': trial.suggest_int('min_child_weight', 1, 5),
             'scale_pos_weight': trial.suggest_int('scale_pos_weight', 1, 5),
             'subsample': trial.suggest_float('subsample', 0.1, 0.9),
             'learning_rate': trial.suggest_float('learning_rate', 0.1, 5)}

    # Use the suggested hyperparameters to initialize the classifier
    clf = XGBClassifier(**param)
    
    # Train the classifier based on the given hyperparameters
    clf.fit(X_train, y_train)
    # Test the classifier using the test set
    y_pred = clf.predict(X_test)
    # Specify the evaluation metric you want to optimize the model based on it
    metric = roc_auc_score(y_test, y_pred)

    # Print the outcomes of each iteration
    print(f"Model Accuracy: {round(metric, 6)}")
    print(f"Model Parameters: {param}")
    return metric

# Use the Optuna to maximize the objective function based on the specified evaluation metrics
study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=1))
study.optimize(objective, n_trials=200)

In [None]:
# Get the number of finished trials
print("Number of finished trials: {}".format(len(study.trials)))
# Get the outcome of the best trial
print("Best trial:")
# Define a variable that represents the best trial
trial = study.best_trial
# Print the best result
print("  Value: {}".format(trial.value))
# Print the best hyperparameters
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

## LightGBM

In [288]:
# Define the objective function you want to optimize
def objective(trial):

    # Define the range of the hyperparameters you want to use in the optimization process
    param = {'num_leaves': trial.suggest_int('num_leaves', 2, 50),
             'max_depth': trial.suggest_int('max_depth', 1, 30), # -1
             'learning_rate': trial.suggest_float('learning_rate', 0.01, 5),
             'n_estimators': trial.suggest_int('n_estimators', 50, 1000),
             'subsample_for_bin': trial.suggest_int('subsample_for_bin', 50,180),
             'scale_pos_weight': trial.suggest_float('scale_pos_weight', 0.1, 8)}

    # Use the suggested hyperparameters to initialize the classifier
    clf = LGBMClassifier(**param, random_state=1, objective='binary')
    
    # Train the classifier based on the given hyperparameters
    clf.fit(X_train, y_train)
    # Test the classifier using the test set
    y_pred = clf.predict(X_test)
    # Specify the evaluation metric you want to optimize the model based on it
    metric = f1_score(y_test, y_pred)
    
    # Print the outcomes of each iteration
    print(f"Model Accuracy: {round(metric, 6)}")
    print(f"Model Parameters: {param}")
    return metric

# Use the Optuna to maximize the objective function based on the specified evaluation metrics
study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=1))
study.optimize(objective, n_trials=100)

In [None]:
# Get the number of finished trials
print("Number of finished trials: {}".format(len(study.trials)))
# Get the outcome of the best trial
print("Best trial:")
# Define a variable that represents the best trial
trial = study.best_trial
# Print the best result
print("  Value: {}".format(trial.value))
# Print the best hyperparameters
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

## CatBoost

In [44]:
# Define the objective function you want to optimize
def objective(trial):

    # Define the range of the hyperparameters you want to use in the optimization process
    param = {'iterations': trial.suggest_int('iterations', 2, 100),
             'depth': trial.suggest_int('depth', 1, 16), # -1
             'learning_rate': trial.suggest_float('learning_rate', 0.001, 5),
             'l2_leaf_reg': trial.suggest_int('l2_leaf_reg', 1, 9)}
    
    # Use the suggested hyperparameters to initialize the classifier
    clf = CatBoostClassifier(**param, random_state=1, custom_metric=['F1', 'AUC'])
    
    # Train the classifier based on the given hyperparameters
    clf.fit(X_train, y_train)
    # Test the classifier using the test set
    y_pred = clf.predict(X_test)
    # Specify the evaluation metric you want to optimize the model based on it
    metric = f1_score(y_test, y_pred)
    
    # Print the outcomes of each iteration
    print(f"Model Accuracy: {round(metric, 6)}")
    print(f"Model Parameters: {param}")
    return metric

# Use the Optuna to maximize the objective function based on the specified evaluation metrics
study = optuna.create_study(direction='maximize', sampler=optuna.samplers.TPESampler(seed=1))
study.optimize(objective, n_trials=100)

In [None]:
# Get the number of finished trials
print("Number of finished trials: {}".format(len(study.trials)))
# Get the outcome of the best trial
print("Best trial:")
# Define a variable that represents the best trial
trial = study.best_trial
# Print the best result
print("  Value: {}".format(trial.value))
# Print the best hyperparameters
print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))