# **Optuna Objective Function - Classification**

In [None]:
def objective_classification(trial):
    model_name = trial.suggest_categorical(
        "model",
        [
            "Logistic Regression", "Random Forest", "Xgboost", "SVM", "GradientBoosting",
            "LightGBM", "CatBoost", "KNN",
            "Linear", "Lasso", "Ridge", "ElasticNet"
        ]
    )

    # Logistic Regression ---> max_iter = 2000 - 5000
    if model_name == "Logistic Regression":
        c = trial.suggest_float("lr_C", 0.1, 100, log=True)
        solver = trial.suggest_categorical("lr_solver", ["liblinear", "saga", "lbfgs", "newton-cg"])
        penalty = None
        if solver in ["lbfgs", "newton-cg"]:
            penalty = trial.suggest_categorical("lr_penalty_lbfgs_nc", ["l2", None])
        elif solver == "liblinear":
            penalty = trial.suggest_categorical("lr_penalty_liblinear", ["l1", "l2"])
        else:  # saga
            penalty = trial.suggest_categorical("lr_penalty_saga", ["l1", "l2", "elasticnet", None])
        l1_ratio = trial.suggest_float("lr_l1_ratio", 0.0, 1.0) if penalty == "elasticnet" else None
    #max_iter = trial.suggest_int("lr_max_iter", 100, 5000, step=100)
        model = LogisticRegression(
            C=c, solver=solver, penalty=penalty,
            l1_ratio=l1_ratio, max_iter=5000, random_state=42
        )

    # Random Forest
    elif model_name == "Random Forest":
        model = RandomForestClassifier(
            n_estimators=trial.suggest_int("rf_n_estimators", 50, 300),
            max_depth=trial.suggest_int("rf_max_depth", 3, 20),
            min_samples_split=trial.suggest_int("rf_min_samples_split", 2, 10),
            min_samples_leaf=trial.suggest_int("rf_min_samples_leaf", 1, 10),
            bootstrap=trial.suggest_categorical("rf_bootstrap", [True, False]),
            random_state=42
        )

    # Decision Tree
    if model_name == "DecisionTreeClassifier":
        # Classification hyperparameters
        max_depth = trial.suggest_int("dtc_max_depth", 2, 20)
        criterion = trial.suggest_categorical("dtc_criterion", ["gini", "entropy", "log_loss"])
        model = DecisionTreeClassifier(
            max_depth=max_depth,
            criterion=criterion,
            random_state=42
        )

    # XGBoost
    elif model_name == "Xgboost":
        model = XGBClassifier(
            learning_rate=trial.suggest_float("xgb_learning_rate", 0.01, 0.3, log=True),
            max_depth=trial.suggest_int("xgb_max_depth", 2, 12),
            n_estimators=trial.suggest_int("xgb_n_estimators", 50, 500),
            subsample=trial.suggest_float("xgb_subsample", 0.5, 1.0),
            colsample_bytree=trial.suggest_float("xgb_colsample_bytree", 0.5, 1.0),
            gamma=trial.suggest_float("xgb_gamma", 0, 5),
            min_child_weight=trial.suggest_int("xgb_min_child_weight", 1, 10),
            reg_alpha=trial.suggest_float("xgb_reg_alpha", 0.0, 5.0),
            reg_lambda=trial.suggest_float("xgb_reg_lambda", 0.0, 5.0),
            random_state=42, use_label_encoder=False, eval_metric="logloss"
        )

    # SVM
    elif model_name == "SVM":
        model = SVC(
            C=trial.suggest_float("svm_C", 0.1, 100, log=True),
            kernel=trial.suggest_categorical("svm_kernel", ["linear", "rbf", "poly", "sigmoid"]),
            gamma=trial.suggest_categorical("svm_gamma", ["scale", "auto"]),
            probability=True, random_state=42
        )

    # Gradient Boosting
    elif model_name == "GradientBoosting":
        model = GradientBoostingClassifier(
            n_estimators=trial.suggest_int("gb_n_estimators", 50, 500),
            learning_rate=trial.suggest_float("gb_learning_rate", 0.01, 0.3, log=True),
            max_depth=trial.suggest_int("gb_max_depth", 2, 10),
            subsample=trial.suggest_float("gb_subsample", 0.5, 1.0),
            min_samples_split=trial.suggest_int("gb_min_samples_split", 2, 20),
            min_samples_leaf=trial.suggest_int("gb_min_samples_leaf", 1, 20),
            random_state=42
        )

    # LightGBM
    elif model_name == "LightGBM":
        model = LGBMClassifier(
            n_estimators=trial.suggest_int("lgb_n_estimators", 50, 500),
            learning_rate=trial.suggest_float("lgb_learning_rate", 0.01, 0.3, log=True),
            max_depth=trial.suggest_int("lgb_max_depth", -1, 20),
            num_leaves=trial.suggest_int("lgb_num_leaves", 20, 300),
            subsample=trial.suggest_float("lgb_subsample", 0.5, 1.0),
            colsample_bytree=trial.suggest_float("lgb_colsample_bytree", 0.5, 1.0),
            random_state=42
        )

    # CatBoost
    elif model_name == "CatBoost":
        model = CatBoostClassifier(
            iterations=trial.suggest_int("cat_iterations", 100, 500),
            depth=trial.suggest_int("cat_depth", 3, 10),
            learning_rate=trial.suggest_float("cat_learning_rate", 0.01, 0.3, log=True),
            l2_leaf_reg=trial.suggest_float("cat_l2_leaf_reg", 1.0, 10.0),
            verbose=0, random_state=42
        )

    # KNN
    elif model_name == "KNN":
        model = KNeighborsClassifier(
            n_neighbors=trial.suggest_int("knn_n_neighbors", 3, 30),
            weights=trial.suggest_categorical("knn_weights", ["uniform", "distance"]),
            p=trial.suggest_int("knn_p", 1, 2)
        )

    # Linear / Lasso / Ridge / ElasticNet
    elif model_name == "Linear":
        model = LogisticRegression(max_iter=5000, random_state=42)

    elif model_name == "Lasso":
        model = LogisticRegression(
            penalty="l1", solver="saga",
            C=trial.suggest_float("lasso_C", 0.01, 10, log=True),
            max_iter=5000, random_state=42
        )

    elif model_name == "Ridge":
        model = LogisticRegression(
            penalty="l2", solver="saga",
            C=trial.suggest_float("ridge_C", 0.01, 10, log=True),
            max_iter=5000, random_state=42
        )

    else:  # ElasticNet
        model = LogisticRegression(
            penalty="elasticnet", solver="saga",
            l1_ratio=trial.suggest_float("elastic_l1_ratio", 0.0, 1.0),
            C=trial.suggest_float("elastic_C", 0.01, 10, log=True),
            max_iter=5000, random_state=42
        )

   # Linear Regression (no penalty → only certain solvers allowed)
    elif model_name == "Linear":
        solver = trial.suggest_categorical("linear_solver", ["lbfgs", "newton-cg", "sag", "saga"])
        model = LogisticRegression(
            penalty=None, solver=solver,
            max_iter=5000, random_state=42
        )

    # Lasso (L1)
    elif model_name == "Lasso":
        solver = trial.suggest_categorical("lasso_solver", ["liblinear", "saga"])
        model = LogisticRegression(
            penalty="l1", solver=solver,
            C=trial.suggest_float("lasso_C", 0.01, 10, log=True),
            max_iter=5000, random_state=42
        )

    # Ridge (L2)
    elif model_name == "Ridge":
        solver = trial.suggest_categorical("ridge_solver", ["lbfgs", "newton-cg", "sag", "saga", "liblinear"])
        model = LogisticRegression(
            penalty="l2", solver=solver,
            C=trial.suggest_float("ridge_C", 0.01, 10, log=True),
            max_iter=5000, random_state=42
        )

    # ElasticNet (only saga)
    elif model_name == "ElasticNet":
        model = LogisticRegression(
            penalty="elasticnet", solver="saga",
            l1_ratio=trial.suggest_float("elastic_l1_ratio", 0.0, 1.0),
            C=trial.suggest_float("elastic_C", 0.01, 10, log=True),
            max_iter=5000, random_state=42
        )
    
        # ---------------- Pipeline ----------------
    pipeline = Pipeline([
        ("preprocessor", preprocessor),
        ("classifier", model)
    ])

    # Use n_jobs=1 to avoid Windows parallelism errors
    scores = cross_val_score(pipeline, X_train, y_train, cv=3, scoring="accuracy", n_jobs=1)
    return scores.mean()


# **Optuna Objective Function - Regression**

In [None]:
def objective_regression(trial):
    model_name = trial.suggest_categorical(
        "model",
        [
            "Random Forest", "Xgboost", "SVM", "GradientBoosting",
            "LightGBM", "CatBoost", "KNN",
            "Linear", "Lasso", "Ridge", "ElasticNet"
        ]
    )

    if model_name == "Random Forest":
        model = RandomForestRegressor(
            n_estimators=trial.suggest_int("rf_n_estimators", 50, 300),
            max_depth=trial.suggest_int("rf_max_depth", 3, 20),
            min_samples_split=trial.suggest_int("rf_min_samples_split", 2, 10),
            min_samples_leaf=trial.suggest_int("rf_min_samples_leaf", 1, 10),
            bootstrap=trial.suggest_categorical("rf_bootstrap", [True, False]),
            random_state=42
        )
     elif model_name == "DecisionTreeRegressor":
        # Regression hyperparameters
        max_depth = trial.suggest_int("dtr_max_depth", 2, 20)
        criterion = trial.suggest_categorical("dtr_criterion", ["squared_error", "friedman_mse", "absolute_error", "poisson"])
        model = DecisionTreeRegressor(
            max_depth=max_depth,
            criterion=criterion,
            random_state=42
        )

    elif model_name == "Xgboost":
        model = XGBRegressor(
            learning_rate=trial.suggest_float("xgb_learning_rate", 0.01, 0.3, log=True),
            max_depth=trial.suggest_int("xgb_max_depth", 2, 12),
            n_estimators=trial.suggest_int("xgb_n_estimators", 50, 500),
            subsample=trial.suggest_float("xgb_subsample", 0.5, 1.0),
            colsample_bytree=trial.suggest_float("xgb_colsample_bytree", 0.5, 1.0),
            gamma=trial.suggest_float("xgb_gamma", 0, 5),
            reg_alpha=trial.suggest_float("xgb_reg_alpha", 0.0, 5.0),
            reg_lambda=trial.suggest_float("xgb_reg_lambda", 0.0, 5.0),
            random_state=42
        )

    elif model_name == "SVM":
        model = SVR(
            C=trial.suggest_float("svm_C", 0.1, 100, log=True),
            kernel=trial.suggest_categorical("svm_kernel", ["linear", "rbf", "poly", "sigmoid"]),
            gamma=trial.suggest_categorical("svm_gamma", ["scale", "auto"])
        )

    elif model_name == "GradientBoosting":
        model = GradientBoostingRegressor(
            n_estimators=trial.suggest_int("gb_n_estimators", 50, 500),
            learning_rate=trial.suggest_float("gb_learning_rate", 0.01, 0.3, log=True),
            max_depth=trial.suggest_int("gb_max_depth", 2, 10),
            subsample=trial.suggest_float("gb_subsample", 0.5, 1.0),
            min_samples_split=trial.suggest_int("gb_min_samples_split", 2, 20),
            min_samples_leaf=trial.suggest_int("gb_min_samples_leaf", 1, 20),
            random_state=42
        )

    elif model_name == "LightGBM":
        model = LGBMRegressor(
            n_estimators=trial.suggest_int("lgb_n_estimators", 50, 500),
            learning_rate=trial.suggest_float("lgb_learning_rate", 0.01, 0.3, log=True),
            max_depth=trial.suggest_int("lgb_max_depth", -1, 20),
            num_leaves=trial.suggest_int("lgb_num_leaves", 20, 300),
            subsample=trial.suggest_float("lgb_subsample", 0.5, 1.0),
            colsample_bytree=trial.suggest_float("lgb_colsample_bytree", 0.5, 1.0),
            random_state=42
        )

    elif model_name == "CatBoost":
        model = CatBoostRegressor(
            iterations=trial.suggest_int("cat_iterations", 100, 500),
            depth=trial.suggest_int("cat_depth", 3, 10),
            learning_rate=trial.suggest_float("cat_learning_rate", 0.01, 0.3, log=True),
            l2_leaf_reg=trial.suggest_float("cat_l2_leaf_reg", 1.0, 10.0),
            verbose=0, random_state=42
        )

    elif model_name == "KNN":
        model = KNeighborsRegressor(
            n_neighbors=trial.suggest_int("knn_n_neighbors", 3, 30),
            weights=trial.suggest_categorical("knn_weights", ["uniform", "distance"]),
            p=trial.suggest_int("knn_p", 1, 2)
        )

    elif model_name == "Linear":
        model = LinearRegression()

    elif model_name == "Lasso":
        model = Lasso(
            alpha=trial.suggest_float("lasso_alpha", 0.0001, 1.0, log=True),
            max_iter=5000, random_state=42
        )

    elif model_name == "Ridge":
        model = Ridge(
            alpha=trial.suggest_float("ridge_alpha", 0.0001, 10.0, log=True),
            max_iter=5000, random_state=42
        )

    else:  # ElasticNet
        model = ElasticNet(
            alpha=trial.suggest_float("elastic_alpha", 0.0001, 1.0, log=True),
            l1_ratio=trial.suggest_float("elastic_l1_ratio", 0.0, 1.0),
            max_iter=5000, random_state=42
        )

    
    # ---------------- Pipeline ----------------
    pipeline = Pipeline([
        ("preprocessor", preprocessor),
        ("classifier", model)
    ])


    score = cross_val_score(model, X_train, y_train, cv=3, scoring="r2").mean()
    return score



In [None]:
# ===============================
# 6. Run Optuna Study
# ===============================
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=30)  # Increase trials for better results

print("Best Params:", study.best_params)
best_model_name = study.best_params["model"]


In [None]:
best_params_rf = study.best_params.copy()

In [None]:
# OR,

best_model_name = best_params_rf.pop("model")

# 🔑 Rule of Thumb:

- If you tune multiple models in one objective → use .pop("model").

- If you tune only one model → don’t use .pop("model"), just assign the model name manually.

In [None]:
# ===============================
# 7. Train Best Model on Full Training Data
# ===============================
# Recreate best model with params (you can extract these from study.best_params)
# Here we only demo with RandomForest as example (adapt as per best_model_name)

best_model = Pipeline([
    ("preprocessor", preprocessor),
    ("classifier", RandomForestClassifier(
        n_estimators=best_params_rf.get("rf_n_estimators", 100),
        max_depth=best_params_rf.get("rf_max_depth", None),
        min_samples_split=best_params_rf.get("rf_min_samples_split", 2),
        min_samples_leaf=best_params_rf.get("rf_min_samples_leaf", 1),
        bootstrap=best_params_rf.get("rf_bootstrap", True),
        random_state=42,
        n_jobs=-1
    ))
])




# Best Hyperparameters Choice

In [None]:
import optuna
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier

def objective_classification(trial):
    model_name = trial.suggest_categorical("model", [
        "LogisticRegression", "Ridge", "RandomForest", "DecisionTree",
        "GradientBoosting", "XGBoost", "LightGBM", "CatBoost", "KNN"
    ])

    if model_name == "LogisticRegression":
        # --- Logistic Regression special handling ---
        C = trial.suggest_float("lr_C", 0.001, 10, log=True)
        solver = trial.suggest_categorical("lr_solver", ["liblinear", "saga", "lbfgs", "newton-cg"])

        penalty = None
        if solver in ["lbfgs", "newton-cg"]:
            penalty = trial.suggest_categorical("lr_penalty_lbfgs_nc", ["l2", None])
        elif solver == "liblinear":
            penalty = trial.suggest_categorical("lr_penalty_liblinear", ["l1", "l2"])
        else:  # saga
            penalty = trial.suggest_categorical("lr_penalty_saga", ["l1", "l2", "elasticnet", None])

        l1_ratio = trial.suggest_float("lr_l1_ratio", 0.0, 1.0) if penalty == "elasticnet" else None

        model = LogisticRegression(
            C=C, solver=solver, penalty=penalty,
            l1_ratio=l1_ratio, max_iter=5000, random_state=42
        )

    elif model_name == "Ridge":
        alpha = trial.suggest_float("alpha", 0.001, 10, log=True)
        model = RidgeClassifier(alpha=alpha, random_state=42)

    elif model_name == "RandomForest":
        n_estimators = trial.suggest_int("n_estimators", 100, 300, step=50)
        max_depth = trial.suggest_int("max_depth", 3, 20)
        model = RandomForestClassifier(
            n_estimators=n_estimators, max_depth=max_depth,
            n_jobs=-1, random_state=42
        )

    elif model_name == "DecisionTree":
        max_depth = trial.suggest_int("max_depth", 3, 20)
        criterion = trial.suggest_categorical("criterion", ["gini", "entropy"])
        model = DecisionTreeClassifier(max_depth=max_depth, criterion=criterion, random_state=42)

    elif model_name == "GradientBoosting":
        n_estimators = trial.suggest_int("n_estimators", 100, 300, step=50)
        learning_rate = trial.suggest_float("learning_rate", 0.01, 0.3)
        max_depth = trial.suggest_int("max_depth", 3, 10)
        model = GradientBoostingClassifier(
            n_estimators=n_estimators, learning_rate=learning_rate,
            max_depth=max_depth, random_state=42
        )

    elif model_name == "XGBoost":
        n_estimators = trial.suggest_int("n_estimators", 100, 300, step=50)
        learning_rate = trial.suggest_float("learning_rate", 0.01, 0.3)
        max_depth = trial.suggest_int("max_depth", 3, 10)
        model = XGBClassifier(
            n_estimators=n_estimators, learning_rate=learning_rate,
            max_depth=max_depth, eval_metric="logloss",
            random_state=42, n_jobs=-1, use_label_encoder=False
        )

    elif model_name == "LightGBM":
        n_estimators = trial.suggest_int("n_estimators", 100, 300, step=50)
        learning_rate = trial.suggest_float("learning_rate", 0.01, 0.3)
        num_leaves = trial.suggest_int("num_leaves", 20, 80)
        model = LGBMClassifier(
            n_estimators=n_estimators, learning_rate=learning_rate,
            num_leaves=num_leaves, random_state=42, n_jobs=-1
        )

    elif model_name == "CatBoost":
        depth = trial.suggest_int("depth", 4, 10)
        learning_rate = trial.suggest_float("learning_rate", 0.01, 0.3)
        iterations = trial.suggest_int("iterations", 100, 300, step=50)
        model = CatBoostClassifier(
            depth=depth, learning_rate=learning_rate,
            iterations=iterations, verbose=0, random_state=42
        )

    elif model_name == "KNN":
        n_neighbors = trial.suggest_int("n_neighbors", 3, 25)
        weights = trial.suggest_categorical("weights", ["uniform", "distance"])
        model = KNeighborsClassifier(n_neighbors=n_neighbors, weights=weights)

    # cross-validation with accuracy
    score = cross_val_score(model, X, y, cv=3, scoring="accuracy", n_jobs=-1)
    return np.mean(score)


In [None]:
from sklearn.linear_model import Ridge, Lasso, ElasticNet, LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor

def objective_regression(trial):
    model_name = trial.suggest_categorical("model", [
        "LinearRegression", "Ridge", "Lasso", "ElasticNet",
        "RandomForest", "DecisionTree", "GradientBoosting",
        "XGBoost", "LightGBM", "CatBoost", "KNN"
    ])

    if model_name == "LinearRegression":
        model = LinearRegression()

    elif model_name == "Ridge":
        alpha = trial.suggest_float("alpha", 0.001, 10, log=True)
        model = Ridge(alpha=alpha, random_state=42)

    elif model_name == "Lasso":
        alpha = trial.suggest_float("alpha", 0.001, 10, log=True)
        model = Lasso(alpha=alpha, max_iter=3000, random_state=42)

    elif model_name == "ElasticNet":
        alpha = trial.suggest_float("alpha", 0.001, 10, log=True)
        l1_ratio = trial.suggest_float("l1_ratio", 0.0, 1.0)
        model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, max_iter=3000, random_state=42)

    elif model_name == "RandomForest":
        n_estimators = trial.suggest_int("n_estimators", 100, 300, step=50)
        max_depth = trial.suggest_int("max_depth", 3, 20)
        model = RandomForestRegressor(
            n_estimators=n_estimators, max_depth=max_depth,
            n_jobs=-1, random_state=42
        )

    elif model_name == "DecisionTree":
        max_depth = trial.suggest_int("max_depth", 3, 20)
        criterion = trial.suggest_categorical("criterion", ["squared_error", "friedman_mse"])
        model = DecisionTreeRegressor(max_depth=max_depth, criterion=criterion, random_state=42)

    elif model_name == "GradientBoosting":
        n_estimators = trial.suggest_int("n_estimators", 100, 300, step=50)
        learning_rate = trial.suggest_float("learning_rate", 0.01, 0.3)
        max_depth = trial.suggest_int("max_depth", 3, 10)
        model = GradientBoostingRegressor(
            n_estimators=n_estimators, learning_rate=learning_rate,
            max_depth=max_depth, random_state=42
        )

    elif model_name == "XGBoost":
        n_estimators = trial.suggest_int("n_estimators", 100, 300, step=50)
        learning_rate = trial.suggest_float("learning_rate", 0.01, 0.3)
        max_depth = trial.suggest_int("max_depth", 3, 10)
        model = XGBRegressor(
            n_estimators=n_estimators, learning_rate=learning_rate,
            max_depth=max_depth, random_state=42, n_jobs=-1
        )

    elif model_name == "LightGBM":
        n_estimators = trial.suggest_int("n_estimators", 100, 300, step=50)
        learning_rate = trial.suggest_float("learning_rate", 0.01, 0.3)
        num_leaves = trial.suggest_int("num_leaves", 20, 80)
        model = LGBMRegressor(
            n_estimators=n_estimators, learning_rate=learning_rate,
            num_leaves=num_leaves, random_state=42, n_jobs=-1
        )

    elif model_name == "CatBoost":
        depth = trial.suggest_int("depth", 4, 10)
        learning_rate = trial.suggest_float("learning_rate", 0.01, 0.3)
        iterations = trial.suggest_int("iterations", 100, 300, step=50)
        model = CatBoostRegressor(
            depth=depth, learning_rate=learning_rate,
            iterations=iterations, verbose=0, random_state=42
        )

    elif model_name == "KNN":
        n_neighbors = trial.suggest_int("n_neighbors", 3, 25)
        weights = trial.suggest_categorical("weights", ["uniform", "distance"])
        model = KNeighborsRegressor(n_neighbors=n_neighbors, weights=weights)

    # cross-validation with r2 score
    score = cross_val_score(model, X, y, cv=3, scoring="r2", n_jobs=-1)
    return np.mean(score)


In [None]:
# Using optuna to find the best parameters for LightGBM

def lgb_objective(trial):
    lgb_params = {
    'learning_rate' : trial.suggest_float('learning_rate' , 0.01 , 0.1 , log=True),
    'num_leaves' : trial.suggest_int('num_leaves' , 5 , 50),
    'max_depth' : trial.suggest_int('max_depth' , 3 , 15),
    'reg_alpha' : trial.suggest_float('reg_alpha' , 0.01 , 0.1 , log=True),
    'reg_lambda' : trial.suggest_float('reg_lambda' , 0.01 , 0.1 , log=True),
    'subsample' : trial.suggest_float('subsample' , 0 , 1)
    }