[Reference](https://medium.com/@chenycy/accelerate-your-machine-learning-skills-with-hyperparameter-tuning-00dfd27e261c)

In [7]:
!pip install optuna
!pip install optuna-integration

Collecting optuna-integration
  Downloading optuna_integration-3.6.0-py3-none-any.whl (93 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m93.4/93.4 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: optuna-integration
Successfully installed optuna-integration-3.6.0


In [8]:
import optuna
from sklearn.datasets import load_iris
from sklearn.svm import SVC

clf = SVC()
param_distributions = {
    "C": optuna.distributions.FloatDistribution(1e-10, 1e10, log=True)
}
optuna_search = optuna.integration.OptunaSearchCV(clf, param_distributions)
X, y = load_iris(return_X_y=True)
optuna_search.fit(X, y)
y_pred = optuna_search.predict(X)

  optuna_search = optuna.integration.OptunaSearchCV(clf, param_distributions)
[I 2024-06-26 23:08:44,922] A new study created in memory with name: no-name-6819e165-1abc-400d-bf56-166b037daba3
[I 2024-06-26 23:08:44,947] Trial 0 finished with value: 0.9266666666666667 and parameters: {'C': 0.03028706732111826}. Best is trial 0 with value: 0.9266666666666667.
[I 2024-06-26 23:08:44,968] Trial 1 finished with value: 0.9266666666666667 and parameters: {'C': 1.3317070172045463e-09}. Best is trial 0 with value: 0.9266666666666667.
[I 2024-06-26 23:08:44,984] Trial 2 finished with value: 0.9800000000000001 and parameters: {'C': 3.3779618556336124}. Best is trial 2 with value: 0.9800000000000001.
[I 2024-06-26 23:08:45,010] Trial 3 finished with value: 0.9400000000000001 and parameters: {'C': 1267373463.957981}. Best is trial 2 with value: 0.9800000000000001.
[I 2024-06-26 23:08:45,035] Trial 4 finished with value: 0.9266666666666667 and parameters: {'C': 0.0005335274801604334}. Best is trial 

In [10]:
from sklearn.model_selection import GridSearchCV
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier


# Define the hyperparameter grid for DecisionTreeClassifier
dt_param_grid = {
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'min_weight_fraction_leaf': [0.0, 0.1, 0.2],
    'max_features': [None, 'sqrt', 'log2'],
    'criterion': ['gini', 'entropy'],
    'splitter': ['best', 'random']
}

# Create the DecisionTreeClassifier
dt_classifier = DecisionTreeClassifier()


# Define the hyperparameter grid for GradientBoostingClassifier
gb_param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'subsample': [0.8, 0.9, 1.0],  # Fraction of samples used for fitting the individual base learners
    'max_features': ['sqrt', 'log2', None],  # Number of features to consider for the best split
    'random_state': [42]  # Random seed for reproducibility
}

# Create the GradientBoostingClassifier
gb_classifier = GradientBoostingClassifier()

# Define the hyperparameter distributions
rf_param_dist = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['auto', 'sqrt', 'log2'],
    'bootstrap': [True, False],
    'criterion': ['gini', 'entropy']
}

rf_classifier = RandomForestClassifier()

# Define the hyperparameter grid for Multinomial Naive Bayes
nb_param_grid = {
    'alpha': [0.1, 0.5, 1.0],      # Additive smoothing parameter
    'fit_prior': [True, False]     # Whether to learn class prior probabilities
}

# Create the Multinomial Naive Bayes model
nb_classifier = MultinomialNB()

# Define the hyperparameter grid for k
knn_param_grid = {
    'n_neighbors': [3, 5, 7],       # Number of neighbors to consider
    'weights': ['uniform', 'distance'],  # Weighting method
    'p': [1, 2],                      # Power parameter for Minkowski distance
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],  # Algorithm for computing neighbors
    'leaf_size': [10, 20, 30],        # Leaf size for tree-based algorithms
    'metric': ['euclidean', 'manhattan', 'chebyshev']  # Distance metric
}

# Create the KNN model
knn_classifier = KNeighborsClassifier()

# Define the hyperparameter grid for kernel type
svm_param_grid = {
    'C': [0.1, 1, 10],                # Regularization parameter
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'degree': [2, 3, 4],               # Degree of the polynomial kernel (only for 'poly')
    'gamma': ['scale', 'auto', 0.1, 1],  # Kernel coefficient for 'rbf', 'poly', and 'sigmoid'
    'coef0': [0.0, 1.0, 2.0]            # Independent term in the kernel function
}

# Create the SVM model
svm_classifier = SVC()

# Use svc as the example for Grid Search
grid_search = GridSearchCV(estimator=svm_classifier, param_grid=svm_param_grid, cv=5)

grid_search.fit(X, y)

print("Best Hyperparameters:", grid_search.best_params_)

Best Hyperparameters: {'C': 0.1, 'coef0': 0.0, 'degree': 2, 'gamma': 'auto', 'kernel': 'poly'}


In [12]:
# from sklearn.model_selection import RandomizedSearchCV

# randomized_search = RandomizedSearchCV(estimator=svm_classifier, param_grid=svm_param_grid, cv=5)

# randomized_search.fit(X, y)

# print("Best Hyperparameters:", randomized_search.best_params_)

In [15]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from hyperopt import fmin, tpe, hp
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

# Load a sample dataset (or use your own data)
digits = load_digits()
X, y = digits.data, digits.target

# Define the objective function for neural network hyperparameter tuning
def objective(params):
    model = Sequential()
    model.add(Dense(units=params['units'], activation=params['activation'], input_dim=X.shape[1]))

    for _ in range(params['num_layers']):
        model.add(Dense(units=params['units'], activation=params['activation']))

    model.add(Dense(units=params['output_units'], activation='softmax'))

    model.compile(optimizer=params['optimizer'],
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

    history = model.fit(X_train, y_train, epochs=params['epochs'], validation_data=(X_val, y_val), verbose=0)

    val_loss = history.history['val_loss'][-1]

    # Hyperopt minimizes the objective function, so negate the validation loss
    return val_loss

# Define the search space for neural network hyperparameters
space = {
    'units': hp.quniform('units', 32, 512, 32),
    'num_layers': hp.quniform('num_layers', 1, 3, 1),
    'activation': hp.choice('activation', ['relu', 'tanh', 'sigmoid']),
    'output_units': hp.quniform('output_units', 2, 10, 1),
    'optimizer': hp.choice('optimizer', ['adam', 'sgd']),
    'epochs': hp.choice('epochs', [10, 20, 30, 50]),
    'batch_size': hp.choice('batch_size', [16, 32, 64]),
    'dropout_rate': hp.uniform('dropout_rate', 0.0, 0.5),
    'learning_rate': hp.loguniform('learning_rate', -5, 0),
}

# Run Hyperopt to find the best hyperparameters
best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=50, rstate=np.random.RandomState(42))

print("Best Hyperparameters:", best)