<a href="https://colab.research.google.com/github/SujeetSaxena/AI-ML/blob/main/Hyperparameter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
pip install optuna




In [6]:
import optuna
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Load dataset
data = load_iris()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Objective function to optimize
def objective(trial):
    # Suggest hyperparameters to tune
    max_depth = trial.suggest_int('max_depth', 1, 10)  # Integer range
    min_samples_split = trial.suggest_int('min_samples_split', 2, 20)

    # Train a Decision Tree with suggested hyperparameters
    model = DecisionTreeClassifier(max_depth=max_depth, min_samples_split=min_samples_split, random_state=42)
    model.fit(X_train, y_train)

    # Evaluate the model
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    return accuracy  # Optuna will maximize this value

# Create and run an Optuna study
study = optuna.create_study(direction='maximize')  # 'maximize' for accuracy
study.optimize(objective, n_trials=50)  # Run 50 trials

# Print the best result
print("Best hyperparameters:", study.best_params)
print("Best accuracy:", study.best_value)


[I 2025-03-08 05:48:07,375] A new study created in memory with name: no-name-2b0ad004-62a7-41b0-a25b-620993f5aa84
[I 2025-03-08 05:48:07,399] Trial 0 finished with value: 0.9666666666666667 and parameters: {'max_depth': 2, 'min_samples_split': 14}. Best is trial 0 with value: 0.9666666666666667.
[I 2025-03-08 05:48:07,410] Trial 1 finished with value: 1.0 and parameters: {'max_depth': 10, 'min_samples_split': 18}. Best is trial 1 with value: 1.0.
[I 2025-03-08 05:48:07,417] Trial 2 finished with value: 0.9666666666666667 and parameters: {'max_depth': 2, 'min_samples_split': 2}. Best is trial 1 with value: 1.0.
[I 2025-03-08 05:48:07,423] Trial 3 finished with value: 1.0 and parameters: {'max_depth': 6, 'min_samples_split': 4}. Best is trial 1 with value: 1.0.
[I 2025-03-08 05:48:07,430] Trial 4 finished with value: 0.9666666666666667 and parameters: {'max_depth': 2, 'min_samples_split': 11}. Best is trial 1 with value: 1.0.
[I 2025-03-08 05:48:07,437] Trial 5 finished with value: 1.0 a

Best hyperparameters: {'max_depth': 10, 'min_samples_split': 18}
Best accuracy: 1.0


Let's look at how Bayesian Optimization can be applied to tune hyperparameters.

In [None]:
pip install scikit-optimize


Collecting scikit-optimize
  Downloading scikit_optimize-0.10.2-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting pyaml>=16.9 (from scikit-optimize)
  Downloading pyaml-25.1.0-py3-none-any.whl.metadata (12 kB)
Downloading scikit_optimize-0.10.2-py2.py3-none-any.whl (107 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m107.8/107.8 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyaml-25.1.0-py3-none-any.whl (26 kB)
Installing collected packages: pyaml, scikit-optimize
Successfully installed pyaml-25.1.0 scikit-optimize-0.10.2


In [None]:
from skopt import gp_minimize
from skopt.space import Integer
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Load dataset
data = load_iris()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the objective function to minimize (Bayesian optimization minimizes by default)
def objective(params):
    max_depth, min_samples_split = params

    # Train a Decision Tree with the hyperparameters
    model = DecisionTreeClassifier(max_depth=max_depth, min_samples_split=min_samples_split, random_state=42)
    model.fit(X_train, y_train)

    # Evaluate the model
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    # Return the negative accuracy (since we want to minimize)
    return -accuracy

# Define the search space
space = [
    Integer(1, 10, name='max_depth'),          # Integer range for max_depth
    Integer(2, 20, name='min_samples_split')   # Integer range for min_samples_split
]

# Run Bayesian Optimization
res = gp_minimize(objective, space, n_calls=50, random_state=42)

# Print the best results
print("Best hyperparameters:")
print("max_depth:", res.x[0])
print("min_samples_split:", res.x[1])
print("Best accuracy:", -res.fun)




Best hyperparameters:
max_depth: 8
min_samples_split: 5
Best accuracy: 1.0
