# **Optuna Objective Function - Classification**

In [None]:
def objective_classification(trial):
    model_name = trial.suggest_categorical(
        "model",
        [
            "Logistic Regression", "Random Forest", "Xgboost", "SVM", "GradientBoosting",
            "LightGBM", "CatBoost", "KNN",
            "Linear", "Lasso", "Ridge", "ElasticNet"
        ]
    )

    # Logistic Regression ---> max_iter = 2000 - 5000
    if model_name == "Logistic Regression":
        c = trial.suggest_float("lr_C", 0.1, 100, log=True)
        solver = trial.suggest_categorical("lr_solver", ["liblinear", "saga", "lbfgs", "newton-cg"])
        penalty = None
        if solver in ["lbfgs", "newton-cg"]:
            penalty = trial.suggest_categorical("lr_penalty_lbfgs_nc", ["l2", None])
        elif solver == "liblinear":
            penalty = trial.suggest_categorical("lr_penalty_liblinear", ["l1", "l2"])
        else:  # saga
            penalty = trial.suggest_categorical("lr_penalty_saga", ["l1", "l2", "elasticnet", None])
        l1_ratio = trial.suggest_float("lr_l1_ratio", 0.0, 1.0) if penalty == "elasticnet" else None
    #max_iter = trial.suggest_int("lr_max_iter", 100, 5000, step=100)
        model = LogisticRegression(
            C=c, solver=solver, penalty=penalty,
            l1_ratio=l1_ratio, max_iter=5000, random_state=42
        )

    # Random Forest
    elif model_name == "Random Forest":
        model = RandomForestClassifier(
            n_estimators=trial.suggest_int("rf_n_estimators", 50, 300),
            max_depth=trial.suggest_int("rf_max_depth", 3, 20),
            min_samples_split=trial.suggest_int("rf_min_samples_split", 2, 10),
            min_samples_leaf=trial.suggest_int("rf_min_samples_leaf", 1, 10),
            bootstrap=trial.suggest_categorical("rf_bootstrap", [True, False]),
            random_state=42
        )

    # XGBoost
    elif model_name == "Xgboost":
        model = XGBClassifier(
            learning_rate=trial.suggest_float("xgb_learning_rate", 0.01, 0.3, log=True),
            max_depth=trial.suggest_int("xgb_max_depth", 2, 12),
            n_estimators=trial.suggest_int("xgb_n_estimators", 50, 500),
            subsample=trial.suggest_float("xgb_subsample", 0.5, 1.0),
            colsample_bytree=trial.suggest_float("xgb_colsample_bytree", 0.5, 1.0),
            gamma=trial.suggest_float("xgb_gamma", 0, 5),
            min_child_weight=trial.suggest_int("xgb_min_child_weight", 1, 10),
            reg_alpha=trial.suggest_float("xgb_reg_alpha", 0.0, 5.0),
            reg_lambda=trial.suggest_float("xgb_reg_lambda", 0.0, 5.0),
            random_state=42, use_label_encoder=False, eval_metric="logloss"
        )

    # SVM
    elif model_name == "SVM":
        model = SVC(
            C=trial.suggest_float("svm_C", 0.1, 100, log=True),
            kernel=trial.suggest_categorical("svm_kernel", ["linear", "rbf", "poly", "sigmoid"]),
            gamma=trial.suggest_categorical("svm_gamma", ["scale", "auto"]),
            probability=True, random_state=42
        )

    # Gradient Boosting
    elif model_name == "GradientBoosting":
        model = GradientBoostingClassifier(
            n_estimators=trial.suggest_int("gb_n_estimators", 50, 500),
            learning_rate=trial.suggest_float("gb_learning_rate", 0.01, 0.3, log=True),
            max_depth=trial.suggest_int("gb_max_depth", 2, 10),
            subsample=trial.suggest_float("gb_subsample", 0.5, 1.0),
            min_samples_split=trial.suggest_int("gb_min_samples_split", 2, 20),
            min_samples_leaf=trial.suggest_int("gb_min_samples_leaf", 1, 20),
            random_state=42
        )

    # LightGBM
    elif model_name == "LightGBM":
        model = LGBMClassifier(
            n_estimators=trial.suggest_int("lgb_n_estimators", 50, 500),
            learning_rate=trial.suggest_float("lgb_learning_rate", 0.01, 0.3, log=True),
            max_depth=trial.suggest_int("lgb_max_depth", -1, 20),
            num_leaves=trial.suggest_int("lgb_num_leaves", 20, 300),
            subsample=trial.suggest_float("lgb_subsample", 0.5, 1.0),
            colsample_bytree=trial.suggest_float("lgb_colsample_bytree", 0.5, 1.0),
            random_state=42
        )

    # CatBoost
    elif model_name == "CatBoost":
        model = CatBoostClassifier(
            iterations=trial.suggest_int("cat_iterations", 100, 500),
            depth=trial.suggest_int("cat_depth", 3, 10),
            learning_rate=trial.suggest_float("cat_learning_rate", 0.01, 0.3, log=True),
            l2_leaf_reg=trial.suggest_float("cat_l2_leaf_reg", 1.0, 10.0),
            verbose=0, random_state=42
        )

    # KNN
    elif model_name == "KNN":
        model = KNeighborsClassifier(
            n_neighbors=trial.suggest_int("knn_n_neighbors", 3, 30),
            weights=trial.suggest_categorical("knn_weights", ["uniform", "distance"]),
            p=trial.suggest_int("knn_p", 1, 2)
        )

    # Linear / Lasso / Ridge / ElasticNet
    elif model_name == "Linear":
        model = LogisticRegression(max_iter=5000, random_state=42)

    elif model_name == "Lasso":
        model = LogisticRegression(
            penalty="l1", solver="saga",
            C=trial.suggest_float("lasso_C", 0.01, 10, log=True),
            max_iter=5000, random_state=42
        )

    elif model_name == "Ridge":
        model = LogisticRegression(
            penalty="l2", solver="saga",
            C=trial.suggest_float("ridge_C", 0.01, 10, log=True),
            max_iter=5000, random_state=42
        )

    else:  # ElasticNet
        model = LogisticRegression(
            penalty="elasticnet", solver="saga",
            l1_ratio=trial.suggest_float("elastic_l1_ratio", 0.0, 1.0),
            C=trial.suggest_float("elastic_C", 0.01, 10, log=True),
            max_iter=5000, random_state=42
        )

   # Linear Regression (no penalty → only certain solvers allowed)
    elif model_name == "Linear":
        solver = trial.suggest_categorical("linear_solver", ["lbfgs", "newton-cg", "sag", "saga"])
        model = LogisticRegression(
            penalty=None, solver=solver,
            max_iter=5000, random_state=42
        )

    # Lasso (L1)
    elif model_name == "Lasso":
        solver = trial.suggest_categorical("lasso_solver", ["liblinear", "saga"])
        model = LogisticRegression(
            penalty="l1", solver=solver,
            C=trial.suggest_float("lasso_C", 0.01, 10, log=True),
            max_iter=5000, random_state=42
        )

    # Ridge (L2)
    elif model_name == "Ridge":
        solver = trial.suggest_categorical("ridge_solver", ["lbfgs", "newton-cg", "sag", "saga", "liblinear"])
        model = LogisticRegression(
            penalty="l2", solver=solver,
            C=trial.suggest_float("ridge_C", 0.01, 10, log=True),
            max_iter=5000, random_state=42
        )

    # ElasticNet (only saga)
    elif model_name == "ElasticNet":
        model = LogisticRegression(
            penalty="elasticnet", solver="saga",
            l1_ratio=trial.suggest_float("elastic_l1_ratio", 0.0, 1.0),
            C=trial.suggest_float("elastic_C", 0.01, 10, log=True),
            max_iter=5000, random_state=42
        )


    score = cross_val_score(model, X_train, y_train, cv=3, scoring="accuracy").mean()
    return score


# **Optuna Objective Function - Regression**

In [None]:
def objective_regression(trial):
    model_name = trial.suggest_categorical(
        "model",
        [
            "Random Forest", "Xgboost", "SVM", "GradientBoosting",
            "LightGBM", "CatBoost", "KNN",
            "Linear", "Lasso", "Ridge", "ElasticNet"
        ]
    )

    if model_name == "Random Forest":
        model = RandomForestRegressor(
            n_estimators=trial.suggest_int("rf_n_estimators", 50, 300),
            max_depth=trial.suggest_int("rf_max_depth", 3, 20),
            min_samples_split=trial.suggest_int("rf_min_samples_split", 2, 10),
            min_samples_leaf=trial.suggest_int("rf_min_samples_leaf", 1, 10),
            bootstrap=trial.suggest_categorical("rf_bootstrap", [True, False]),
            random_state=42
        )

    elif model_name == "Xgboost":
        model = XGBRegressor(
            learning_rate=trial.suggest_float("xgb_learning_rate", 0.01, 0.3, log=True),
            max_depth=trial.suggest_int("xgb_max_depth", 2, 12),
            n_estimators=trial.suggest_int("xgb_n_estimators", 50, 500),
            subsample=trial.suggest_float("xgb_subsample", 0.5, 1.0),
            colsample_bytree=trial.suggest_float("xgb_colsample_bytree", 0.5, 1.0),
            gamma=trial.suggest_float("xgb_gamma", 0, 5),
            reg_alpha=trial.suggest_float("xgb_reg_alpha", 0.0, 5.0),
            reg_lambda=trial.suggest_float("xgb_reg_lambda", 0.0, 5.0),
            random_state=42
        )

    elif model_name == "SVM":
        model = SVR(
            C=trial.suggest_float("svm_C", 0.1, 100, log=True),
            kernel=trial.suggest_categorical("svm_kernel", ["linear", "rbf", "poly", "sigmoid"]),
            gamma=trial.suggest_categorical("svm_gamma", ["scale", "auto"])
        )

    elif model_name == "GradientBoosting":
        model = GradientBoostingRegressor(
            n_estimators=trial.suggest_int("gb_n_estimators", 50, 500),
            learning_rate=trial.suggest_float("gb_learning_rate", 0.01, 0.3, log=True),
            max_depth=trial.suggest_int("gb_max_depth", 2, 10),
            subsample=trial.suggest_float("gb_subsample", 0.5, 1.0),
            min_samples_split=trial.suggest_int("gb_min_samples_split", 2, 20),
            min_samples_leaf=trial.suggest_int("gb_min_samples_leaf", 1, 20),
            random_state=42
        )

    elif model_name == "LightGBM":
        model = LGBMRegressor(
            n_estimators=trial.suggest_int("lgb_n_estimators", 50, 500),
            learning_rate=trial.suggest_float("lgb_learning_rate", 0.01, 0.3, log=True),
            max_depth=trial.suggest_int("lgb_max_depth", -1, 20),
            num_leaves=trial.suggest_int("lgb_num_leaves", 20, 300),
            subsample=trial.suggest_float("lgb_subsample", 0.5, 1.0),
            colsample_bytree=trial.suggest_float("lgb_colsample_bytree", 0.5, 1.0),
            random_state=42
        )

    elif model_name == "CatBoost":
        model = CatBoostRegressor(
            iterations=trial.suggest_int("cat_iterations", 100, 500),
            depth=trial.suggest_int("cat_depth", 3, 10),
            learning_rate=trial.suggest_float("cat_learning_rate", 0.01, 0.3, log=True),
            l2_leaf_reg=trial.suggest_float("cat_l2_leaf_reg", 1.0, 10.0),
            verbose=0, random_state=42
        )

    elif model_name == "KNN":
        model = KNeighborsRegressor(
            n_neighbors=trial.suggest_int("knn_n_neighbors", 3, 30),
            weights=trial.suggest_categorical("knn_weights", ["uniform", "distance"]),
            p=trial.suggest_int("knn_p", 1, 2)
        )

    elif model_name == "Linear":
        model = LinearRegression()

    elif model_name == "Lasso":
        model = Lasso(
            alpha=trial.suggest_float("lasso_alpha", 0.0001, 1.0, log=True),
            max_iter=5000, random_state=42
        )

    elif model_name == "Ridge":
        model = Ridge(
            alpha=trial.suggest_float("ridge_alpha", 0.0001, 10.0, log=True),
            max_iter=5000, random_state=42
        )

    else:  # ElasticNet
        model = ElasticNet(
            alpha=trial.suggest_float("elastic_alpha", 0.0001, 1.0, log=True),
            l1_ratio=trial.suggest_float("elastic_l1_ratio", 0.0, 1.0),
            max_iter=5000, random_state=42
        )

    score = cross_val_score(model, X_train, y_train, cv=3, scoring="r2").mean()
    return score