In [3]:
import optuna
import numpy as np
import pandas as pd
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score

# Load the dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the data into training and validation sets
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)



In [8]:
def objective(trial):
    # Define hyperparameters to tune
    n_estimators = trial.suggest_int('n_estimators', 50, 200)
    max_depth = trial.suggest_int('max_depth', 2, 10)
    learning_rate = trial.suggest_loguniform('learning_rate', 0.001, 0.1)

    # Choose bagging or boosting algorithm
    classifier_name = trial.suggest_categorical('classifier', ['adaboost', 'gradientboost', 'randomforest'])

    # Instantiate the classifier
    if classifier_name == 'bagging':
        pass
        # classifier = BaggingClassifier(n_estimators=n_estimators, max_depth=max_depth)
    elif classifier_name == 'adaboost':
        classifier = AdaBoostClassifier(n_estimators=n_estimators, learning_rate=learning_rate)
    elif classifier_name == 'gradientboost':
        classifier = GradientBoostingClassifier(n_estimators=n_estimators, max_depth=max_depth, learning_rate=learning_rate)
    else:  # Random Forest
        classifier = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth)

    # Train and evaluate the classifier
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_valid)
    accuracy = accuracy_score(y_valid, y_pred)

    return accuracy



In [9]:
# Run the optimization
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)



[32m[I 2023-05-27 09:41:47,791][0m A new study created in memory with name: no-name-f11e9490-97f8-4c19-9c13-d99df9bb60ab[0m
  learning_rate = trial.suggest_loguniform('learning_rate', 0.001, 0.1)
[32m[I 2023-05-27 09:41:48,058][0m Trial 0 finished with value: 1.0 and parameters: {'n_estimators': 150, 'max_depth': 5, 'learning_rate': 0.02164298934256047, 'classifier': 'gradientboost'}. Best is trial 0 with value: 1.0.[0m
  learning_rate = trial.suggest_loguniform('learning_rate', 0.001, 0.1)
[32m[I 2023-05-27 09:41:48,186][0m Trial 1 finished with value: 1.0 and parameters: {'n_estimators': 151, 'max_depth': 9, 'learning_rate': 0.0056667845439341115, 'classifier': 'randomforest'}. Best is trial 0 with value: 1.0.[0m
  learning_rate = trial.suggest_loguniform('learning_rate', 0.001, 0.1)
[32m[I 2023-05-27 09:41:48,465][0m Trial 2 finished with value: 1.0 and parameters: {'n_estimators': 190, 'max_depth': 3, 'learning_rate': 0.005178838834930987, 'classifier': 'gradientboost'}.

In [10]:

# Print the best hyperparameters and score
best_params = study.best_params
best_score = study.best_value
print('Best Parameters:', best_params)
print('Best Score:', best_score)

Best Parameters: {'n_estimators': 150, 'max_depth': 5, 'learning_rate': 0.02164298934256047, 'classifier': 'gradientboost'}
Best Score: 1.0


In [11]:
# Define the best hyperparameters obtained from Optuna
best_params = {
    'n_estimators': 150,
    'max_depth': 6,
    'learning_rate': 0.05,
    'classifier': 'gradientboost'  # Use the best classifier obtained from Optuna
}

# Instantiate the best classifier with the best hyperparameters
if best_params['classifier'] == 'bagging':
    pass
    # classifier = BaggingClassifier(n_estimators=best_params['n_estimators'], max_depth=best_params['max_depth'])
elif best_params['classifier'] == 'adaboost':
    classifier = AdaBoostClassifier(n_estimators=best_params['n_estimators'], learning_rate=best_params['learning_rate'])
elif best_params['classifier'] == 'gradientboost':
    classifier = GradientBoostingClassifier(n_estimators=best_params['n_estimators'], max_depth=best_params['max_depth'], learning_rate=best_params['learning_rate'])
else:  # Random Forest
    classifier = RandomForestClassifier(n_estimators=best_params['n_estimators'], max_depth=best_params['max_depth'])

# Train the classifier on the entire dataset
classifier.fit(X, y)

# Generate new data for prediction (example)
new_data = np.array([[5.1, 3.5, 1.4, 0.2], [6.3, 2.9, 5.6, 1.8]])

# Make predictions on the new data
predictions = classifier.predict(new_data)

# Print the predictions
print('Predictions:', predictions)


Predictions: [0 2]
