<a href="https://colab.research.google.com/github/RifatMuhtasim/Data_Science/blob/main/Model_Training/Optuna_Classification_Hyperparameter_Optimization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
try:
    import optuna
except:
    !pip install --quiet optuna
    import optuna

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.1/380.1 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.4/233.4 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.8/78.8 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import sklearn.datasets
import pandas as pd

In [None]:
iris = sklearn.datasets.load_iris()
df = pd.DataFrame(data = iris.data, columns = iris.feature_names)
df['target'] = iris.target
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [None]:
df['target'].value_counts()

target
0    50
1    50
2    50
Name: count, dtype: int64

In [None]:
from sklearn.model_selection import train_test_split

X = df.drop(['target'], axis="columns")
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.20, random_state=42)

## Define a simple scikit-learn model

In [None]:
from sklearn.model_selection import cross_val_score

In [None]:
# Single Logistic Regression
from sklearn.linear_model import LogisticRegression

def objective(X, y):
    clf = LogisticRegression()  # Define the model.

    return cross_val_score(
        clf, X, y, n_jobs=-1, cv=5
    ).mean()  # Train and evaluate the model.


result = objective(X_train, y_train)
print(f"Accuracy: {result}")

Accuracy: 0.9666666666666668


In [None]:
# Single Logistic Regression
from sklearn.ensemble import RandomForestClassifier

def objective(X, y):
    clf = RandomForestClassifier()  # Define the model.

    return cross_val_score(
        clf, X, y, n_jobs=-1, cv=5
    ).mean()  # Train and evaluate the model.


result = objective(X_train, y_train)
print(f"Accuracy: {result}")

Accuracy: 0.9583333333333334


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_score


def objective(X, y, model):
    clf = model

    return cross_val_score(
        clf, X, y, n_jobs=-1, cv=5
    ).mean()


models = {'Logistic_Regression':  LogisticRegression(),
                    'Random_Forest': RandomForestClassifier(),
                    'Decision_Tree': DecisionTreeClassifier(),
                    'XGB_Classifier': XGBClassifier(),
                    'SVM': SVC(),
                    'K_Nearest_Neighbors': KNeighborsClassifier(),
                    'GaussianNB': GaussianNB()}

for model_name, model in models.items():
    result = objective(X_train, y_train, model=model)
    print(f"{model_name} Accuracy is: ", result)

Logistic_Regression Accuracy is:  0.9666666666666668
Random_Forest Accuracy is:  0.95
Decision_Tree Accuracy is:  0.9416666666666668
XGB_Classifier Accuracy is:  0.95
SVM Accuracy is:  0.975
K_Nearest_Neighbors Accuracy is:  0.975
GaussianNB Accuracy is:  0.9583333333333334


# Optuna

In [None]:
# Optuna For Logistic Regression

import optuna
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score

def LogisticRegression_Optuna(X, y):
    def objective(trial, X, y):
        params = {
            'penalty' : trial.suggest_categorical('penalty', ['l1', 'l2']),
            'C': trial.suggest_float('C', 1e-5, 1e5, log=True),
            'solver': trial.suggest_categorical('solver', ['liblinear', 'saga'])
        }

        clf = LogisticRegression(**params)
        return cross_val_score(clf, X, y, n_jobs=-1, cv=5).mean()

    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial, X, y), n_trials=100)
    trial = study.best_trial
    return trial

result = LogisticRegression_Optuna(X_train, y_train)
print(f"Logistic Regression Accuracy: {result.value}")
print(f"Best Hyperparameters: {result.params}")

[I 2024-05-07 21:35:55,946] A new study created in memory with name: no-name-7377d351-24bc-494d-80e1-e0bf958dddf4
[I 2024-05-07 21:35:55,994] Trial 0 finished with value: 0.3333333333333333 and parameters: {'penalty': 'l2', 'C': 0.00024144186851515707, 'solver': 'liblinear'}. Best is trial 0 with value: 0.3333333333333333.
[I 2024-05-07 21:35:56,121] Trial 1 finished with value: 0.9583333333333334 and parameters: {'penalty': 'l1', 'C': 39.83157898951783, 'solver': 'liblinear'}. Best is trial 1 with value: 0.9583333333333334.
[I 2024-05-07 21:35:56,174] Trial 2 finished with value: 0.95 and parameters: {'penalty': 'l2', 'C': 0.8079432552425845, 'solver': 'liblinear'}. Best is trial 1 with value: 0.9583333333333334.
[I 2024-05-07 21:35:56,295] Trial 3 finished with value: 0.9583333333333334 and parameters: {'penalty': 'l1', 'C': 319.56858102396626, 'solver': 'liblinear'}. Best is trial 1 with value: 0.9583333333333334.
[I 2024-05-07 21:35:56,414] Trial 4 finished with value: 0.9583333333

Logistic Regression Accuracy: 0.9833333333333334
Best Hyperparameters: {'penalty': 'l2', 'C': 271.90842962887945, 'solver': 'saga'}


In [None]:
# Optuna For XGBoost

import optuna
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score

def XGB_Optuna(X, y):
    def objective(trial, X, y):
        params = {
            'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
            'max_depth': trial.suggest_int('max_depth', 3, 10),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
            'subsample': trial.suggest_float('subsample', 0.5, 1.0),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
            'gamma': trial.suggest_float('gamma', 1e-8, 1.0, log=True),
            'min_child_weight': trial.suggest_int('min_child_weight', 1, 300),
            'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 1.0, log=True),
            'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 1.0, log=True),
            'random_state': 42,
            'n_jobs': -1
        }

        clf = XGBClassifier(**params)
        return cross_val_score(clf, X, y, n_jobs=-1, cv=5).mean()

    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial=trial, X=X, y=y), n_trials=100)
    trial = study.best_trial
    return trial


result = XGB_Optuna(X_train, y_train)
print(f"XGBoost Accuracy: {result.value}")
print(f"Best Hyperparameters: {result.params}")

[I 2024-05-07 21:36:09,752] A new study created in memory with name: no-name-11f7444d-8a43-4e77-a140-982af15d8603
[I 2024-05-07 21:36:11,293] Trial 0 finished with value: 0.3333333333333333 and parameters: {'n_estimators': 819, 'max_depth': 10, 'learning_rate': 0.01887957020861126, 'subsample': 0.5924080520472457, 'colsample_bytree': 0.5441981806924261, 'gamma': 0.04860929015739589, 'min_child_weight': 225, 'reg_alpha': 1.9486898014788065e-07, 'reg_lambda': 1.770966012558957e-08}. Best is trial 0 with value: 0.3333333333333333.
[I 2024-05-07 21:36:12,113] Trial 1 finished with value: 0.3333333333333333 and parameters: {'n_estimators': 462, 'max_depth': 8, 'learning_rate': 0.012481870141794735, 'subsample': 0.6182921282661313, 'colsample_bytree': 0.627857875706148, 'gamma': 0.002189115550558356, 'min_child_weight': 242, 'reg_alpha': 5.1491841259925435e-08, 'reg_lambda': 0.00010271270158538885}. Best is trial 0 with value: 0.3333333333333333.
[I 2024-05-07 21:36:12,701] Trial 2 finished 

XGBoost Accuracy: 0.9583333333333334
Best Hyperparameters: {'n_estimators': 694, 'max_depth': 10, 'learning_rate': 0.02182719160256513, 'subsample': 0.6837846507104118, 'colsample_bytree': 0.694925594026333, 'gamma': 0.786424172461476, 'min_child_weight': 3, 'reg_alpha': 4.883031908778155e-06, 'reg_lambda': 1.0010996820701067e-08}


In [None]:
# Optuna For RandomForest

import optuna
from sklearn.ensemble import RandomForestClassifier

def RandomForest_Optuna(X, y):
    def objective(trial, X, y):
        params = {
            'n_estimators': trial.suggest_int("n_estimators", 2, 20),
            'max_depth': int(trial.suggest_float("max_depth", 1, 32, log=True))
        }
        clf = RandomForestClassifier(**params)
        return cross_val_score(clf, X, y, n_jobs=-1, cv=5).mean()

    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial= trial, X= X, y= y), n_trials=100)
    trial = study.best_trial
    return trial

result = RandomForest_Optuna(X=X_train, y=y_train)
print(f"Random Forest Accuracy: {result.value}")
print(f"Best Hyperparameters: {result.params}")

[I 2024-05-07 21:37:36,604] A new study created in memory with name: no-name-236d7fe0-72a8-4979-b5a2-329d728149bd
[I 2024-05-07 21:37:36,820] Trial 0 finished with value: 0.9416666666666667 and parameters: {'n_estimators': 14, 'max_depth': 17.765015390318435}. Best is trial 0 with value: 0.9416666666666667.
[I 2024-05-07 21:37:36,924] Trial 1 finished with value: 0.95 and parameters: {'n_estimators': 4, 'max_depth': 4.278210512565028}. Best is trial 1 with value: 0.95.
[I 2024-05-07 21:37:37,112] Trial 2 finished with value: 0.95 and parameters: {'n_estimators': 12, 'max_depth': 4.129302008599562}. Best is trial 1 with value: 0.95.
[I 2024-05-07 21:37:37,354] Trial 3 finished with value: 0.9416666666666667 and parameters: {'n_estimators': 20, 'max_depth': 2.0360321712383618}. Best is trial 1 with value: 0.95.
[I 2024-05-07 21:37:37,468] Trial 4 finished with value: 0.95 and parameters: {'n_estimators': 5, 'max_depth': 6.028984708520022}. Best is trial 1 with value: 0.95.
[I 2024-05-07 

Random Forest Accuracy: 0.9666666666666668
Best Hyperparameters: {'n_estimators': 12, 'max_depth': 3.63314859152777}


In [None]:
# Optuna For DecisionTreeClassifier

import optuna
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score

def DecisionTree_Optuna(X, y):
    def objective(trial, X, y):
        params = {
            'max_depth': trial.suggest_int('max_depth', 3, 10),
            'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
            'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 10),
            'random_state': 42
        }

        clf = DecisionTreeClassifier(**params)
        return cross_val_score(clf, X, y, cv=5).mean()

    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial=trial, X=X, y=y), n_trials=100)
    trial = study.best_trial
    return trial


result = DecisionTree_Optuna(X=X_train, y=y_train)
print(f"DecisionTree Accuracy: {result.value}")
print(f"Best Hyperparameters: {result.params}")

[I 2024-05-07 21:37:48,801] A new study created in memory with name: no-name-bb333ddb-2245-4cc5-b936-00dd8f2c4994
[I 2024-05-07 21:37:48,831] Trial 0 finished with value: 0.925 and parameters: {'max_depth': 6, 'min_samples_split': 10, 'min_samples_leaf': 5}. Best is trial 0 with value: 0.925.
[I 2024-05-07 21:37:48,870] Trial 1 finished with value: 0.9333333333333333 and parameters: {'max_depth': 3, 'min_samples_split': 13, 'min_samples_leaf': 3}. Best is trial 1 with value: 0.9333333333333333.
[I 2024-05-07 21:37:48,895] Trial 2 finished with value: 0.9333333333333333 and parameters: {'max_depth': 10, 'min_samples_split': 7, 'min_samples_leaf': 2}. Best is trial 1 with value: 0.9333333333333333.
[I 2024-05-07 21:37:48,921] Trial 3 finished with value: 0.9333333333333333 and parameters: {'max_depth': 8, 'min_samples_split': 13, 'min_samples_leaf': 4}. Best is trial 1 with value: 0.9333333333333333.
[I 2024-05-07 21:37:48,946] Trial 4 finished with value: 0.925 and parameters: {'max_dep

DecisionTree Accuracy: 0.9333333333333333
Best Hyperparameters: {'max_depth': 3, 'min_samples_split': 13, 'min_samples_leaf': 3}


In [None]:
# Optuna For SVM

import optuna
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

def SVC_Optuna(X, y):
    def objective(trial, X, y):
        params = {
            'C': trial.suggest_float('C', 1e-5, 100, log=True),
            'kernel': trial.suggest_categorical('kernel', ['linear', 'rbf', 'poly', 'sigmoid']),
            'gamma': trial.suggest_categorical('gamma', ['scale', 'auto']),
            'degree': trial.suggest_int('degree', 1, 5),
            'coef0': trial.suggest_float('coef0', 0.0, 10.0),
            'random_state': 42
        }

        clf = SVC(**params)
        return cross_val_score(clf, X, y, cv=5).mean()

    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial=trial, X=X, y=y), n_trials=100)
    trial = study.best_trial
    return trial

result = SVC_Optuna(X=X_train, y=y_train)
print(f"SVC Accuracy: {result.value}")
print(f"Best Hyperparameters: {result.params}")

[I 2024-05-07 21:37:54,180] A new study created in memory with name: no-name-518d90f3-f3fb-4c0d-8f7e-a2f6eea4764d
[I 2024-05-07 21:37:54,212] Trial 0 finished with value: 0.3333333333333333 and parameters: {'C': 17.329597586599803, 'kernel': 'sigmoid', 'gamma': 'auto', 'degree': 5, 'coef0': 7.733447813629483}. Best is trial 0 with value: 0.3333333333333333.
[I 2024-05-07 21:37:54,241] Trial 1 finished with value: 0.9083333333333334 and parameters: {'C': 3.1063379753380045e-05, 'kernel': 'poly', 'gamma': 'auto', 'degree': 1, 'coef0': 8.09469651407515}. Best is trial 1 with value: 0.9083333333333334.
[I 2024-05-07 21:37:54,270] Trial 2 finished with value: 0.95 and parameters: {'C': 7.4473923725616515, 'kernel': 'poly', 'gamma': 'auto', 'degree': 3, 'coef0': 6.813149603253792}. Best is trial 2 with value: 0.95.
[I 2024-05-07 21:37:54,297] Trial 3 finished with value: 0.9666666666666668 and parameters: {'C': 7.800074580297807, 'kernel': 'linear', 'gamma': 'auto', 'degree': 3, 'coef0': 3.4

SVC Accuracy: 0.9833333333333334
Best Hyperparameters: {'C': 0.7762913315081683, 'kernel': 'poly', 'gamma': 'scale', 'degree': 2, 'coef0': 3.2953991880842466}


In [None]:
# Optuna For KNeighborsClassifer

import optuna
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score

def KNeighbors_Optuna(X, y):
    def objective(trial, X, y):
        params = {
            'n_neighbors': trial.suggest_int('n_neighbors', 1, 30),
            'weights': trial.suggest_categorical('weights', ['uniform', 'distance']),
            'algorithm': trial.suggest_categorical('algorithm', ['auto', 'ball_tree', 'kd_tree', 'brute']),
            'leaf_size': trial.suggest_int('leaf_size', 10, 50),
            'p': trial.suggest_int('p', 1, 2),
            'n_jobs': -1
        }

        clf = KNeighborsClassifier(**params)
        return cross_val_score(clf, X, y, cv=5).mean()

    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial=trial, X=X, y=y), n_trials=100)
    trial = study.best_trial
    return trial

result = KNeighbors_Optuna(X=X_train, y=y_train)
print(f"KNeighborsClassifier Accuracy: {result.value}")
print(f"Best Hyperparameters: {result.params}")

[I 2024-05-07 21:37:58,700] A new study created in memory with name: no-name-dff78f05-926d-424b-a09c-1a323ac77041
[I 2024-05-07 21:37:58,820] Trial 0 finished with value: 0.9666666666666666 and parameters: {'n_neighbors': 8, 'weights': 'uniform', 'algorithm': 'brute', 'leaf_size': 25, 'p': 2}. Best is trial 0 with value: 0.9666666666666666.
[I 2024-05-07 21:37:58,946] Trial 1 finished with value: 0.975 and parameters: {'n_neighbors': 10, 'weights': 'uniform', 'algorithm': 'ball_tree', 'leaf_size': 46, 'p': 1}. Best is trial 1 with value: 0.975.
[I 2024-05-07 21:37:59,038] Trial 2 finished with value: 0.975 and parameters: {'n_neighbors': 20, 'weights': 'distance', 'algorithm': 'ball_tree', 'leaf_size': 36, 'p': 2}. Best is trial 1 with value: 0.975.
[I 2024-05-07 21:37:59,135] Trial 3 finished with value: 0.975 and parameters: {'n_neighbors': 5, 'weights': 'uniform', 'algorithm': 'kd_tree', 'leaf_size': 18, 'p': 2}. Best is trial 1 with value: 0.975.
[I 2024-05-07 21:37:59,169] Trial 4

KNeighborsClassifier Accuracy: 0.9833333333333334
Best Hyperparameters: {'n_neighbors': 6, 'weights': 'uniform', 'algorithm': 'brute', 'leaf_size': 50, 'p': 2}


In [None]:
import optuna
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_score

def GaussianNB_Optuna(X, y):
    def objective(trial, X, y):
        params= {
            'var_smoothing' : trial.suggest_float('var_smoothing', 1e-12, 1e-3, log=True)
        }

        clf = GaussianNB(**params)
        return cross_val_score(clf, X, y, cv=5).mean()

    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial=trial, X=X, y=y), n_trials=100)
    trial = study.best_trial
    return trial


result = GaussianNB_Optuna(X=X_train, y=y_train)
print(f"GaussianNB Accuracy: {result.value}")
print(f"Best Hyperparameters: {result.params}")

[I 2024-05-07 21:38:10,554] A new study created in memory with name: no-name-eaea05de-478f-4934-b6dc-6922b24c33bb
[I 2024-05-07 21:38:10,578] Trial 0 finished with value: 0.9583333333333334 and parameters: {'var_smoothing': 3.482672219099085e-10}. Best is trial 0 with value: 0.9583333333333334.
[I 2024-05-07 21:38:10,606] Trial 1 finished with value: 0.9583333333333334 and parameters: {'var_smoothing': 2.2808453233157421e-07}. Best is trial 0 with value: 0.9583333333333334.
[I 2024-05-07 21:38:10,631] Trial 2 finished with value: 0.9583333333333334 and parameters: {'var_smoothing': 5.387250106593735e-10}. Best is trial 0 with value: 0.9583333333333334.
[I 2024-05-07 21:38:10,658] Trial 3 finished with value: 0.9583333333333334 and parameters: {'var_smoothing': 1.7834595386561944e-09}. Best is trial 0 with value: 0.9583333333333334.
[I 2024-05-07 21:38:10,682] Trial 4 finished with value: 0.9583333333333334 and parameters: {'var_smoothing': 3.2852417275772026e-11}. Best is trial 0 with 

GaussianNB Accuracy: 0.9583333333333334
Best Hyperparameters: {'var_smoothing': 3.482672219099085e-10}


In [None]:
# Mutlinomial NB

import optuna
from sklearn.naive_bayes import MultinomialNB
from sklearn.model_selection import cross_val_score

def MultinomialNB_Optuna(X, y):
    def objective(trial, X, y):
        params = {
            'alpha' : trial.suggest_float('alpha', 1e-5, 1.0, log=True)
        }

        clf = MultinomialNB(**params)
        return cross_val_score(clf, X, y, cv=5).mean()

    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial=trial, X=X, y=y), n_trials=100)
    trial = study.best_trial
    return trial

result = MultinomialNB_Optuna(X=X_train, y=y_train)
print(f"MultinomialNB Accuracy: {result.value}")
print(f"Best Hyperparameters: {result.params}")

[I 2024-05-07 21:38:13,653] A new study created in memory with name: no-name-097a420a-0732-4886-9d10-d76c319c2597
[I 2024-05-07 21:38:13,693] Trial 0 finished with value: 0.9416666666666667 and parameters: {'alpha': 0.0029521739209855064}. Best is trial 0 with value: 0.9416666666666667.
[I 2024-05-07 21:38:13,724] Trial 1 finished with value: 0.9416666666666667 and parameters: {'alpha': 0.333029830203611}. Best is trial 0 with value: 0.9416666666666667.
[I 2024-05-07 21:38:13,754] Trial 2 finished with value: 0.9416666666666667 and parameters: {'alpha': 1.1020954857327466e-05}. Best is trial 0 with value: 0.9416666666666667.
[I 2024-05-07 21:38:13,780] Trial 3 finished with value: 0.9416666666666667 and parameters: {'alpha': 1.0580875466157702e-05}. Best is trial 0 with value: 0.9416666666666667.
[I 2024-05-07 21:38:13,805] Trial 4 finished with value: 0.9416666666666667 and parameters: {'alpha': 0.21128307103891142}. Best is trial 0 with value: 0.9416666666666667.
[I 2024-05-07 21:38:

MultinomialNB Accuracy: 0.95
Best Hyperparameters: {'alpha': 0.613592375529841}


# Multiple Model

In [None]:
# Using Optuna on Multiple Model

import optuna
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score

def Optuna_Hyperparameter_tuning(X, y):
    def objective(trial, X, y):
        classifier = trial.suggest_categorical("classifier", ['RandomForest', "XGB"])

        if classifier == "RandomForest":
            rf_params = {
                'n_estimators' : trial.suggest_int("rf_n_estimators", 2, 20),
                'max_depth': int(trial.suggest_float("rf_max_depth", 3, 10)),
                'random_state': 42,
                'n_jobs': -1
            }
            clf = RandomForestClassifier(**rf_params)

        else:
            xgb_params = {
                'n_estimators': trial.suggest_int('xgb_n_estimators', 100, 1000),
                'max_depth': trial.suggest_int('xgb_max_depth', 3, 10),
                'learning_rate': trial.suggest_float('xgb_learning_rate', 0.01, 0.3, log=True),
                'subsample': trial.suggest_float('xgb_subsample', 0.5, 1.0),
                'colsample_bytree': trial.suggest_float('xgb_colsample_bytree', 0.5, 1.0),
                'gamma': trial.suggest_float('xgb_gamma', 1e-8, 1.0, log=True),
                'min_child_weight': trial.suggest_int('xgb_min_child_weight', 1, 300),
                'reg_alpha': trial.suggest_float('xgb_reg_alpha', 1e-8, 1.0, log=True),
                'reg_lambda': trial.suggest_float('xgb_reg_lambda', 1e-8, 1.0, log=True),
                'random_state': 42,
                'n_jobs': -1
            }
            clf = XGBClassifier(**xgb_params)

        return cross_val_score(clf, X, y, n_jobs=-1, cv=5).mean()

    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial=trial, X=X, y=y), n_trials=100)
    trial = study.best_trial
    return trial

result = Optuna_Hyperparameter_tuning(X=X_train, y=y_train)
print(f"Accuracy: {result.value}")
print(f"Best Hyperparameters: {result.params}")

[I 2024-05-07 21:38:17,784] A new study created in memory with name: no-name-91b69aa7-c434-44f2-a39d-d59ae176c253
[I 2024-05-07 21:38:18,001] Trial 0 finished with value: 0.9583333333333334 and parameters: {'classifier': 'RandomForest', 'rf_n_estimators': 14, 'rf_max_depth': 7.4319859550198535}. Best is trial 0 with value: 0.9583333333333334.
[I 2024-05-07 21:38:18,812] Trial 1 finished with value: 0.3333333333333333 and parameters: {'classifier': 'XGB', 'xgb_n_estimators': 840, 'xgb_max_depth': 9, 'xgb_learning_rate': 0.04315225904464256, 'xgb_subsample': 0.610968191344435, 'xgb_colsample_bytree': 0.5294700850108377, 'xgb_gamma': 3.5577351248623125e-05, 'xgb_min_child_weight': 20, 'xgb_reg_alpha': 0.062032410349030884, 'xgb_reg_lambda': 7.852899691401548e-05}. Best is trial 0 with value: 0.9583333333333334.
[I 2024-05-07 21:38:19,532] Trial 2 finished with value: 0.3333333333333333 and parameters: {'classifier': 'XGB', 'xgb_n_estimators': 733, 'xgb_max_depth': 5, 'xgb_learning_rate': 

Accuracy: 0.9666666666666668
Best Hyperparameters: {'classifier': 'RandomForest', 'rf_n_estimators': 15, 'rf_max_depth': 7.606556023554221}


# Startified K-Fold


```
# Perform 5-fold cross-validation with Stratified K-Fold
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scores = cross_val_score(clf, X, y, cv=skf)
```



In [None]:
# Optuna For Logistic Regression

import optuna
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, StratifiedKFold

def LogisticRegression_Optuna(X, y):
    def objective(trial, X, y):
        params = {
            'penalty' : trial.suggest_categorical('penalty', ['l1', 'l2']),
            'C': trial.suggest_float('C', 1e-5, 1e5, log=True),
            'solver': trial.suggest_categorical('solver', ['liblinear', 'saga'])
        }

        clf = LogisticRegression(**params)
        skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
        return cross_val_score(clf, X, y, n_jobs=-1, cv=skf).mean()

    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial, X, y), n_trials=100)
    trial = study.best_trial
    return trial

result = LogisticRegression_Optuna(X_train, y_train)
print(f"Logistic Regression Accuracy: {result.value}")
print(f"Best Hyperparameters: {result.params}")

[I 2024-05-07 21:38:45,105] A new study created in memory with name: no-name-8d0bfb5b-7ab9-492f-bff1-b3c77141ed5f
[I 2024-05-07 21:38:45,156] Trial 0 finished with value: 0.6916666666666667 and parameters: {'penalty': 'l2', 'C': 3.3255236460650755e-05, 'solver': 'saga'}. Best is trial 0 with value: 0.6916666666666667.
[I 2024-05-07 21:38:45,207] Trial 1 finished with value: 0.9666666666666668 and parameters: {'penalty': 'l1', 'C': 2024.0962365545251, 'solver': 'saga'}. Best is trial 1 with value: 0.9666666666666668.
[I 2024-05-07 21:38:45,246] Trial 2 finished with value: 0.3333333333333333 and parameters: {'penalty': 'l1', 'C': 0.00045482745555488775, 'solver': 'saga'}. Best is trial 1 with value: 0.9666666666666668.
[I 2024-05-07 21:38:45,355] Trial 3 finished with value: 0.9583333333333334 and parameters: {'penalty': 'l1', 'C': 2818.7649490593208, 'solver': 'liblinear'}. Best is trial 1 with value: 0.9666666666666668.
[I 2024-05-07 21:38:45,401] Trial 4 finished with value: 0.983333

Logistic Regression Accuracy: 0.9833333333333334
Best Hyperparameters: {'penalty': 'l2', 'C': 0.16866854825402958, 'solver': 'saga'}


In [None]:
# Optuna For XGBoost Stratified

import optuna
from xgboost import XGBClassifier
from sklearn.model_selection import cross_val_score

def XGB_Optuna(X, y):
    def objective(trial, X, y):
        params = {
            'n_estimators': trial.suggest_int('n_estimators', 100, 1000),
            'max_depth': trial.suggest_int('max_depth', 3, 10),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
            'subsample': trial.suggest_float('subsample', 0.5, 1.0),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
            'gamma': trial.suggest_float('gamma', 1e-8, 1.0, log=True),
            'min_child_weight': trial.suggest_int('min_child_weight', 1, 300),
            'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 1.0, log=True),
            'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 1.0, log=True),
            'random_state': 42,
            'n_jobs': -1
        }

        clf = XGBClassifier(**params)
        skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
        return cross_val_score(clf, X, y, n_jobs=-1, cv=skf).mean()

    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial=trial, X=X, y=y), n_trials=100)
    trial = study.best_trial
    return trial


result = XGB_Optuna(X=X_train, y=y_train)
print(f"XGBoost Accuracy: {result.value}")
print(f"Best Hyperparameters: {result.params}")

[I 2024-05-07 21:38:50,896] A new study created in memory with name: no-name-e943d9bd-6e9f-49ac-bd12-cd96e60ea692
[I 2024-05-07 21:38:51,269] Trial 0 finished with value: 0.3333333333333333 and parameters: {'n_estimators': 344, 'max_depth': 7, 'learning_rate': 0.18974226806252006, 'subsample': 0.9178136939615446, 'colsample_bytree': 0.9864146849202264, 'gamma': 3.1297879823106584e-05, 'min_child_weight': 87, 'reg_alpha': 0.00011459792898900966, 'reg_lambda': 4.665027097815945e-06}. Best is trial 0 with value: 0.3333333333333333.
[I 2024-05-07 21:38:52,029] Trial 1 finished with value: 0.95 and parameters: {'n_estimators': 725, 'max_depth': 3, 'learning_rate': 0.05289258572765257, 'subsample': 0.6778040099487413, 'colsample_bytree': 0.8972106705203525, 'gamma': 3.458317290621975e-07, 'min_child_weight': 4, 'reg_alpha': 7.095552750398122e-07, 'reg_lambda': 0.027154761291847677}. Best is trial 1 with value: 0.95.
[I 2024-05-07 21:38:52,254] Trial 2 finished with value: 0.3333333333333333 

XGBoost Accuracy: 0.9583333333333334
Best Hyperparameters: {'n_estimators': 441, 'max_depth': 6, 'learning_rate': 0.010155754107149745, 'subsample': 0.8738369053842003, 'colsample_bytree': 0.8368350659038509, 'gamma': 8.545817077175639e-06, 'min_child_weight': 1, 'reg_alpha': 8.740761789690457e-07, 'reg_lambda': 3.1411662526203097e-05}


In [None]:
# Optuna For KNeighborsClassifer

import optuna
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score

def KNeighbors_Optuna(X, y):
    def objective(trial, X, y):
        params = {
            'n_neighbors': trial.suggest_int('n_neighbors', 1, 30),
            'weights': trial.suggest_categorical('weights', ['uniform', 'distance']),
            'algorithm': trial.suggest_categorical('algorithm', ['auto', 'ball_tree', 'kd_tree', 'brute']),
            'leaf_size': trial.suggest_int('leaf_size', 10, 50),
            'p': trial.suggest_int('p', 1, 2),
            'n_jobs': -1
        }

        clf = KNeighborsClassifier(**params)
        skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
        return cross_val_score(clf, X, y, n_jobs=-1, cv=skf).mean()

    study = optuna.create_study(direction="maximize")
    study.optimize(lambda trial: objective(trial=trial, X=X, y=y), n_trials=100)
    trial = study.best_trial
    return trial

result = KNeighbors_Optuna(X=X_train, y=y_train)
print(f"KNeighborsClassifier Accuracy: {result.value}")
print(f"Best Hyperparameters: {result.params}")

[I 2024-05-07 21:39:50,632] A new study created in memory with name: no-name-b5ccd384-6d5e-4b58-93f8-497198163110
[I 2024-05-07 21:39:50,712] Trial 0 finished with value: 0.975 and parameters: {'n_neighbors': 19, 'weights': 'uniform', 'algorithm': 'kd_tree', 'leaf_size': 11, 'p': 2}. Best is trial 0 with value: 0.975.
[I 2024-05-07 21:39:50,801] Trial 1 finished with value: 0.95 and parameters: {'n_neighbors': 18, 'weights': 'uniform', 'algorithm': 'auto', 'leaf_size': 12, 'p': 1}. Best is trial 0 with value: 0.975.
[I 2024-05-07 21:39:50,870] Trial 2 finished with value: 0.975 and parameters: {'n_neighbors': 14, 'weights': 'distance', 'algorithm': 'auto', 'leaf_size': 36, 'p': 2}. Best is trial 0 with value: 0.975.
[I 2024-05-07 21:39:50,951] Trial 3 finished with value: 0.9583333333333334 and parameters: {'n_neighbors': 15, 'weights': 'distance', 'algorithm': 'kd_tree', 'leaf_size': 40, 'p': 1}. Best is trial 0 with value: 0.975.
[I 2024-05-07 21:39:51,022] Trial 4 finished with valu

KNeighborsClassifier Accuracy: 0.9833333333333334
Best Hyperparameters: {'n_neighbors': 17, 'weights': 'distance', 'algorithm': 'kd_tree', 'leaf_size': 21, 'p': 2}


In [None]:
from sklearn.metrics import accuracy_score

final_model = KNeighborsClassifier(n_neighbors=17, weights= 'distance', algorithm= 'ball_tree', leaf_size= 41, p= 2)
final_model.fit(X_train, y_train)

y_train_pred = final_model.predict(X_train)
y_val_pred = final_model.predict(X_test)

# Calculate Accuracy
train_accuracy = accuracy_score(y_train, y_train_pred)
val_accuracy = accuracy_score(y_test, y_val_pred)

print("K-Nearest Neighbors: ")
print("Training Accuracy:", train_accuracy)
print("Validation Accuracy:", val_accuracy)

K-Nearest Neighbors: 
Training Accuracy: 1.0
Validation Accuracy: 0.9666666666666667
