<a href="https://colab.research.google.com/github/RifatMuhtasim/Data_Science_Workflow/blob/main/5.2.Optuna_Classification_Hyperparameter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
try:
    import optuna
except:
    !pip install --quiet optuna
    import optuna

## Define a simple scikit-learn model

In [None]:
# Define a simple scikit-learn model

# Single Logistic Regression
from sklearn.linear_model import LogisticRegression

def objective(X, y):
    clf = LogisticRegression()  # Define the model.

    return cross_val_score(
        clf, X, y, n_jobs=-1, cv=5
    ).mean()  # Train and evaluate the model.


result = objective(X_train, y_train)
print(f"Accuracy: {result}")

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_score


def objective(X, y, model):
    clf = model

    return cross_val_score(
        clf, X, y, n_jobs=-1, cv=5
    ).mean()


models = {'Logistic_Regression':  LogisticRegression(),
                    'Random_Forest': RandomForestClassifier(),
                    'Decision_Tree': DecisionTreeClassifier(),
                    'XGB_Classifier': XGBClassifier(),
                    'SVM': SVC(),
                    'K_Nearest_Neighbors': KNeighborsClassifier(),
                    'GaussianNB': GaussianNB()}

for model_name, model in models.items():
    result = objective(X_train, y_train, model=model)
    print(f"{model_name} Accuracy is: ", result)

# 1. Optuna

In [None]:
# Optuna For Logistic Regression

import optuna
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score

def LogisticRegression_Optuna(X, y):
    def objective(trial, X, y):
        params = {
            'penalty' : trial.suggest_categorical('penalty', ['l1', 'l2']),
            'C': trial.suggest_float('C', 1e-5, 1e5, log=True),
            'solver': trial.suggest_categorical('solver', ['liblinear', 'saga'])
        }

        clf = LogisticRegression(**params)
        return cross_val_score(clf, X, y, n_jobs=-1, cv=5).mean()

    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial, X, y), n_trials=100)
    trial = study.best_trial
    return trial

result = LogisticRegression_Optuna(X_train, y_train)
print(f"Logistic Regression Accuracy: {result.value}")
print(f"Best Hyperparameters: {result.params}")

In [None]:
# Optuna For XGBoost

import optuna
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score

def XGB_Optuna(X, y):
    def objective(trial, X, y):
        params = {
            'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
            'max_depth': trial.suggest_int('max_depth', 3, 10),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
            'subsample': trial.suggest_float('subsample', 0.5, 1.0),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
            'gamma': trial.suggest_float('gamma', 1e-8, 1.0, log=True),
            'min_child_weight': trial.suggest_int('min_child_weight', 1, 300),
            'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 1.0, log=True),
            'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 1.0, log=True),
            'random_state': 42,
            'n_jobs': -1
        }

        clf = XGBClassifier(**params)
        return cross_val_score(clf, X, y, n_jobs=-1, cv=5).mean()

    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial=trial, X=X, y=y), n_trials=100)
    trial = study.best_trial
    return trial


result = XGB_Optuna(X_train, y_train)
print(f"XGBoost Accuracy: {result.value}")
print(f"Best Hyperparameters: {result.params}")

In [None]:
# Optuna For RandomForest

import optuna
from sklearn.ensemble import RandomForestClassifier

def RandomForest_Optuna(X, y):
    def objective(trial, X, y):
        params = {
            'n_estimators': trial.suggest_int("n_estimators", 2, 20),
            'max_depth': int(trial.suggest_float("max_depth", 1, 32, log=True))
        }
        clf = RandomForestClassifier(**params)
        return cross_val_score(clf, X, y, n_jobs=-1, cv=5).mean()

    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial= trial, X= X, y= y), n_trials=100)
    trial = study.best_trial
    return trial

result = RandomForest_Optuna(X=X_train, y=y_train)
print(f"Random Forest Accuracy: {result.value}")
print(f"Best Hyperparameters: {result.params}")

In [None]:
# Optuna For DecisionTreeClassifier

import optuna
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score

def DecisionTree_Optuna(X, y):
    def objective(trial, X, y):
        params = {
            'max_depth': trial.suggest_int('max_depth', 3, 10),
            'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
            'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10),
            'random_state': 42
        }

        clf = DecisionTreeClassifier(**params)
        return cross_val_score(clf, X, y, cv=5).mean()

    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial=trial, X=X, y=y), n_trials=100)
    trial = study.best_trial
    return trial


result = DecisionTree_Optuna(X=X_train, y=y_train)
print(f"DecisionTree Accuracy: {result.value}")
print(f"Best Hyperparameters: {result.params}")

In [None]:
# Optuna For SVM

import optuna
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

def SVC_Optuna(X, y):
    def objective(trial, X, y):
        params = {
            'C': trial.suggest_float('C', 1e-5, 100, log=True),
            'kernel': trial.suggest_categorical('kernel', ['linear', 'rbf', 'poly', 'sigmoid']),
            'gamma': trial.suggest_categorical('gamma', ['scale', 'auto']),
            'degree': trial.suggest_int('degree', 1, 5),
            'coef0': trial.suggest_float('coef0', 0.0, 10.0),
            'random_state': 42
        }

        clf = SVC(**params)
        return cross_val_score(clf, X, y, cv=5).mean()

    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial=trial, X=X, y=y), n_trials=100)
    trial = study.best_trial
    return trial

result = SVC_Optuna(X=X_train, y=y_train)
print(f"SVC Accuracy: {result.value}")
print(f"Best Hyperparameters: {result.params}")

In [None]:
# Optuna For KNeighborsClassifer

import optuna
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score

def KNeighbors_Optuna(X, y):
    def objective(trial, X, y):
        params = {
            'n_neighbors': trial.suggest_int('n_neighbors', 1, 30),
            'weights': trial.suggest_categorical('weights', ['uniform', 'distance']),
            'algorithm': trial.suggest_categorical('algorithm', ['auto', 'ball_tree', 'kd_tree', 'brute']),
            'leaf_size': trial.suggest_int('leaf_size', 10, 50),
            'p': trial.suggest_int('p', 1, 2),
            'n_jobs': -1
        }

        clf = KNeighborsClassifier(**params)
        return cross_val_score(clf, X, y, cv=5).mean()

    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial=trial, X=X, y=y), n_trials=100)
    trial = study.best_trial
    return trial

result = KNeighbors_Optuna(X=X_train, y=y_train)
print(f"KNeighborsClassifier Accuracy: {result.value}")
print(f"Best Hyperparameters: {result.params}")

In [None]:
import optuna
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_score

def GaussianNB_Optuna(X, y):
    def objective(trial, X, y):
        params= {
            'var_smoothing' : trial.suggest_float('var_smoothing', 1e-12, 1e-3, log=True)
        }

        clf = GaussianNB(**params)
        return cross_val_score(clf, X, y, cv=5).mean()

    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial=trial, X=X, y=y), n_trials=100)
    trial = study.best_trial
    return trial


result = GaussianNB_Optuna(X=X_train, y=y_train)
print(f"GaussianNB Accuracy: {result.value}")
print(f"Best Hyperparameters: {result.params}")

In [None]:
# Mutlinomial NB

import optuna
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import cross_val_score

def MultinomialNB_Optuna(X, y):
    def objective(trial, X, y):
        params = {
            'alpha' : trial.suggest_float('alpha', 1e-5, 1.0, log=True)
        }

        clf = MultinomialNB(**params)
        return cross_val_score(clf, X, y, cv=5).mean()

    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial=trial, X=X, y=y), n_trials=100)
    trial = study.best_trial
    return trial

result = MultinomialNB_Optuna(X=X_train, y=y_train)
print(f"MultinomialNB Accuracy: {result.value}")
print(f"Best Hyperparameters: {result.params}")

# 2. Multiple Optuna Model

In [None]:
# Using Optuna on Multiple Model

import optuna
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score

def Optuna_Hyperparameter_tuning(X, y):
    def objective(trial, X, y):
        classifier = trial.suggest_categorical("classifier", ['RandomForest', "XGB"])

        if classifier == "RandomForest":
            rf_params = {
                'n_estimators' : trial.suggest_int("rf_n_estimators", 2, 20),
                'max_depth': int(trial.suggest_float("rf_max_depth", 3, 10)),
                'random_state': 42,
                'n_jobs': -1
            }
            clf = RandomForestClassifier(**rf_params)

        else:
            xgb_params = {
                'n_estimators': trial.suggest_int('xgb_n_estimators', 100, 1000),
                'max_depth': trial.suggest_int('xgb_max_depth', 3, 10),
                'learning_rate': trial.suggest_float('xgb_learning_rate', 0.01, 0.3, log=True),
                'subsample': trial.suggest_float('xgb_subsample', 0.5, 1.0),
                'colsample_bytree': trial.suggest_float('xgb_colsample_bytree', 0.5, 1.0),
                'gamma': trial.suggest_float('xgb_gamma', 1e-8, 1.0, log=True),
                'min_child_weight': trial.suggest_int('xgb_min_child_weight', 1, 300),
                'reg_alpha': trial.suggest_float('xgb_reg_alpha', 1e-8, 1.0, log=True),
                'reg_lambda': trial.suggest_float('xgb_reg_lambda', 1e-8, 1.0, log=True),
                'random_state': 42,
                'n_jobs': -1
            }
            clf = XGBClassifier(**xgb_params)

        return cross_val_score(clf, X, y, n_jobs=-1, cv=5).mean()

    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial=trial, X=X, y=y), n_trials=100)
    trial = study.best_trial
    return trial

result = Optuna_Hyperparameter_tuning(X=X_train, y=y_train)
print(f"Accuracy: {result.value}")
print(f"Best Hyperparameters: {result.params}")

# 3. Model Using Startified K-Fold


```
# Perform 5-fold cross-validation with Stratified K-Fold
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(clf, X, y, cv=skf)
```



In [None]:
# Optuna For Logistic Regression

import optuna
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, StratifiedKFold

def LogisticRegression_Optuna(X, y):
    def objective(trial, X, y):
        params = {
            'penalty' : trial.suggest_categorical('penalty', ['l1', 'l2']),
            'C': trial.suggest_float('C', 1e-5, 1e5, log=True),
            'solver': trial.suggest_categorical('solver', ['liblinear', 'saga'])
        }

        clf = LogisticRegression(**params)
        skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
        return cross_val_score(clf, X, y, n_jobs=-1, cv=skf).mean()

    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial, X, y), n_trials=100)
    trial = study.best_trial
    return trial

result = LogisticRegression_Optuna(X_train, y_train)
print(f"Logistic Regression Accuracy: {result.value}")
print(f"Best Hyperparameters: {result.params}")

In [None]:
# Optuna For XGBoost Stratified

import optuna
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score

def XGB_Optuna(X, y):
    def objective(trial, X, y):
        params = {
            'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
            'max_depth': trial.suggest_int('max_depth', 3, 10),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
            'subsample': trial.suggest_float('subsample', 0.5, 1.0),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
            'gamma': trial.suggest_float('gamma', 1e-8, 1.0, log=True),
            'min_child_weight': trial.suggest_int('min_child_weight', 1, 300),
            'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 1.0, log=True),
            'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 1.0, log=True),
            'random_state': 42,
            'n_jobs': -1
        }

        clf = XGBClassifier(**params)
        skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
        return cross_val_score(clf, X, y, n_jobs=-1, cv=skf).mean()

    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial=trial, X=X, y=y), n_trials=100)
    trial = study.best_trial
    return trial


result = XGB_Optuna(X=X_train, y=y_train)
print(f"XGBoost Accuracy: {result.value}")
print(f"Best Hyperparameters: {result.params}")

In [None]:
# Optuna For KNeighborsClassifer

import optuna
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score

def KNeighbors_Optuna(X, y):
    def objective(trial, X, y):
        params = {
            'n_neighbors': trial.suggest_int('n_neighbors', 1, 30),
            'weights': trial.suggest_categorical('weights', ['uniform', 'distance']),
            'algorithm': trial.suggest_categorical('algorithm', ['auto', 'ball_tree', 'kd_tree', 'brute']),
            'leaf_size': trial.suggest_int('leaf_size', 10, 50),
            'p': trial.suggest_int('p', 1, 2),
            'n_jobs': -1
        }

        clf = KNeighborsClassifier(**params)
        skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
        return cross_val_score(clf, X, y, n_jobs=-1, cv=skf).mean()

    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial=trial, X=X, y=y), n_trials=100)
    trial = study.best_trial
    return trial

result = KNeighbors_Optuna(X=X_train, y=y_train)
print(f"KNeighborsClassifier Accuracy: {result.value}")
print(f"Best Hyperparameters: {result.params}")