In [None]:
import optuna
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostClassifier


# ==========================
# Random Forest
# ==========================
def objective_rf(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 1000),
        "max_depth": trial.suggest_int("max_depth", 3, 30),
        "min_samples_split": trial.suggest_int("min_samples_split", 2, 20),
        "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 10),
        "max_features": trial.suggest_categorical("max_features", ["sqrt", "log2", None]),
        "bootstrap": trial.suggest_categorical("bootstrap", [True, False])
    }
    model = RandomForestClassifier(**params, random_state=42)
    model.fit(X_train, y_train)
    preds = model.predict(X_valid)
    return accuracy_score(y_valid, preds)


# ==========================
# XGBoost
# ==========================
def objective_xgb(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 1000),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        "max_depth": trial.suggest_int("max_depth", 3, 15),
        "subsample": trial.suggest_float("subsample", 0.5, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
        "gamma": trial.suggest_float("gamma", 0, 10),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 10)
    }
    model = xgb.XGBClassifier(
        **params, random_state=42, use_label_encoder=False, eval_metric="logloss"
    )
    model.fit(X_train, y_train)
    preds = model.predict(X_valid)
    return accuracy_score(y_valid, preds)


# ==========================
# LightGBM
# ==========================
def objective_lgb(trial):
    params = {
        "num_leaves": trial.suggest_int("num_leaves", 20, 200),
        "max_depth": trial.suggest_int("max_depth", -1, 20),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        "n_estimators": trial.suggest_int("n_estimators", 100, 1000),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 50),
        "subsample": trial.suggest_float("subsample", 0.5, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
        "reg_alpha": trial.suggest_float("reg_alpha", 0, 10),
        "reg_lambda": trial.suggest_float("reg_lambda", 0, 10)
    }
    model = lgb.LGBMClassifier(**params, random_state=42)
    model.fit(X_train, y_train)
    preds = model.predict(X_valid)
    return accuracy_score(y_valid, preds)


# ==========================
# CatBoost
# ==========================
def objective_cat(trial):
    params = {
        "iterations": trial.suggest_int("iterations", 100, 1000),
        "depth": trial.suggest_int("depth", 3, 12),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 1, 10),
        "border_count": trial.suggest_int("border_count", 32, 255)
    }
    model = CatBoostClassifier(
        **params, random_state=42, verbose=0
    )
    model.fit(X_train, y_train)
    preds = model.predict(X_valid)
    return accuracy_score(y_valid, preds)


# ==========================
# KNN
# ==========================
def objective_knn(trial):
    params = {
        "n_neighbors": trial.suggest_int("n_neighbors", 1, 50),
        "weights": trial.suggest_categorical("weights", ["uniform", "distance"]),
        "p": trial.suggest_int("p", 1, 2)
    }
    model = KNeighborsClassifier(**params)
    model.fit(X_train, y_train)
    preds = model.predict(X_valid)
    return accuracy_score(y_valid, preds)


# ==========================
# Logistic Regression
# ==========================
def objective_logreg(trial):
    params = {
        "penalty": trial.suggest_categorical("penalty", ["l1", "l2", "elasticnet", "none"]),
        "C": trial.suggest_float("C", 0.001, 10, log=True),
        "solver": trial.suggest_categorical("solver", ["liblinear", "saga", "lbfgs"]),
    }
    # elasticnet requires l1_ratio
    if params["penalty"] == "elasticnet":
        params["l1_ratio"] = trial.suggest_float("l1_ratio", 0, 1)
    model = LogisticRegression(max_iter=2000, random_state=42, **params)
    model.fit(X_train, y_train)
    preds = model.predict(X_valid)
    return accuracy_score(y_valid, preds)


# ==========================
# Decision Tree
# ==========================
def objective_dt(trial):
    params = {
        "max_depth": trial.suggest_int("max_depth", 3, 30),
        "min_samples_split": trial.suggest_int("min_samples_split", 2, 20),
        "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 10),
        "max_features": trial.suggest_categorical("max_features", ["sqrt", "log2", None])
    }
    model = DecisionTreeClassifier(**params, random_state=42)
    model.fit(X_train, y_train)
    preds = model.predict(X_valid)
    return accuracy_score(y_valid, preds)


In [None]:
study = optuna.create_study(direction="maximize")
study.optimize(objective_rf, n_trials=50)
print("Best RF params:", study.best_params)


In [None]:
import optuna
import numpy as np
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostRegressor


# ==========================
# Random Forest Regressor
# ==========================
def objective_rf_reg(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 1000),
        "max_depth": trial.suggest_int("max_depth", 3, 30),
        "min_samples_split": trial.suggest_int("min_samples_split", 2, 20),
        "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 10),
        "max_features": trial.suggest_categorical("max_features", ["sqrt", "log2", None]),
        "bootstrap": trial.suggest_categorical("bootstrap", [True, False])
    }
    model = RandomForestRegressor(**params, random_state=42)
    model.fit(X_train, y_train)
    preds = model.predict(X_valid)
    return mean_squared_error(y_valid, preds)


# ==========================
# XGBoost Regressor
# ==========================
def objective_xgb_reg(trial):
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 1000),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        "max_depth": trial.suggest_int("max_depth", 3, 15),
        "subsample": trial.suggest_float("subsample", 0.5, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
        "gamma": trial.suggest_float("gamma", 0, 10),
        "min_child_weight": trial.suggest_int("min_child_weight", 1, 10)
    }
    model = xgb.XGBRegressor(**params, random_state=42)
    model.fit(X_train, y_train)
    preds = model.predict(X_valid)
    return mean_squared_error(y_valid, preds)


# ==========================
# LightGBM Regressor
# ==========================
def objective_lgb_reg(trial):
    params = {
        "num_leaves": trial.suggest_int("num_leaves", 20, 200),
        "max_depth": trial.suggest_int("max_depth", -1, 20),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        "n_estimators": trial.suggest_int("n_estimators", 100, 1000),
        "min_child_samples": trial.suggest_int("min_child_samples", 5, 50),
        "subsample": trial.suggest_float("subsample", 0.5, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
        "reg_alpha": trial.suggest_float("reg_alpha", 0, 10),
        "reg_lambda": trial.suggest_float("reg_lambda", 0, 10)
    }
    model = lgb.LGBMRegressor(**params, random_state=42)
    model.fit(X_train, y_train)
    preds = model.predict(X_valid)
    return mean_squared_error(y_valid, preds)


# ==========================
# CatBoost Regressor
# ==========================
def objective_cat_reg(trial):
    params = {
        "iterations": trial.suggest_int("iterations", 100, 1000),
        "depth": trial.suggest_int("depth", 3, 12),
        "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.3),
        "l2_leaf_reg": trial.suggest_float("l2_leaf_reg", 1, 10),
        "border_count": trial.suggest_int("border_count", 32, 255)
    }
    model = CatBoostRegressor(**params, random_state=42, verbose=0)
    model.fit(X_train, y_train)
    preds = model.predict(X_valid)
    return mean_squared_error(y_valid, preds)


# ==========================
# KNN Regressor
# ==========================
def objective_knn_reg(trial):
    params = {
        "n_neighbors": trial.suggest_int("n_neighbors", 1, 50),
        "weights": trial.suggest_categorical("weights", ["uniform", "distance"]),
        "p": trial.suggest_int("p", 1, 2)
    }
    model = KNeighborsRegressor(**params)
    model.fit(X_train, y_train)
    preds = model.predict(X_valid)
    return mean_squared_error(y_valid, preds)

# ==========================
# Decision Tree Regressor
# ==========================
def objective_dt_reg(trial):
    params = {
        "max_depth": trial.suggest_int("max_depth", 3, 30),
        "min_samples_split": trial.suggest_int("min_samples_split", 2, 20),
        "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 10),
        "max_features": trial.suggest_categorical("max_features", ["sqrt", "log2", None])
    }
    model = DecisionTreeRegressor(**params, random_state=42)
    model.fit(X_train, y_train)
    preds = model.predict(X_valid)
    return mean_squared_error(y_valid, preds)



# ==========================
# Linear Regression
# ==========================
def objective_lin_reg(trial):
    model = LinearRegression()
    model.fit(X_train, y_train)
    preds = model.predict(X_valid)
    return mean_squared_error(y_valid, preds)


# ==========================
# Ridge Regression
# ==========================
def objective_ridge(trial):
    params = {
        "alpha": trial.suggest_float("alpha", 0.0001, 10, log=True)
    }
    model = Ridge(**params, random_state=42, max_iter=2000)
    model.fit(X_train, y_train)
    preds = model.predict(X_valid)
    return mean_squared_error(y_valid, preds)


# ==========================
# Lasso Regression
# ==========================
def objective_lasso(trial):
    params = {
        "alpha": trial.suggest_float("alpha", 0.0001, 10, log=True)
    }
    model = Lasso(**params, random_state=42, max_iter=2000)
    model.fit(X_train, y_train)
    preds = model.predict(X_valid)
    return mean_squared_error(y_valid, preds)


# ==========================
# ElasticNet Regression
# ==========================
def objective_enet(trial):
    params = {
        "alpha": trial.suggest_float("alpha", 0.0001, 10, log=True),
        "l1_ratio": trial.suggest_float("l1_ratio", 0, 1)
    }
    model = ElasticNet(**params, random_state=42, max_iter=2000)
    model.fit(X_train, y_train)
    preds = model.predict(X_valid)
    return mean_squared_error(y_valid, preds)


# ==========================
# Polynomial Regression
# ==========================
def objective_poly(trial):
    degree = trial.suggest_int("degree", 2, 5)
    model = Pipeline([
        ("poly", PolynomialFeatures(degree=degree)),
        ("lin_reg", LinearRegression())
    ])
    model.fit(X_train, y_train)
    preds = model.predict(X_valid)
    return mean_squared_error(y_valid, preds)


In [None]:
study = optuna.create_study(direction="minimize")
study.optimize(objective_rf_reg, n_trials=50)
print("Best RF Regressor params:", study.best_params)


# Unified Classification Objective

In [None]:
def objective_classification(trial):
    model_name = trial.suggest_categorical("model", ["RandomForest", "DecisionTree", "XGBoost", 
                                                     "LightGBM", "CatBoost", "KNN", "LogisticRegression"])
    
    # -----------------------------
    # Random Forest
    # -----------------------------
    if model_name == "RandomForest":
        params = {
            "n_estimators": trial.suggest_int("rf_n_estimators", 100, 1000),
            "max_depth": trial.suggest_int("rf_max_depth", 3, 30),
            "min_samples_split": trial.suggest_int("rf_min_samples_split", 2, 20),
            "min_samples_leaf": trial.suggest_int("rf_min_samples_leaf", 1, 10),
            "max_features": trial.suggest_categorical("rf_max_features", ["sqrt", "log2", None]),
            "bootstrap": trial.suggest_categorical("rf_bootstrap", [True, False])
        }
        model = RandomForestClassifier(**params, random_state=42)
    
    # -----------------------------
    # Decision Tree
    # -----------------------------
    elif model_name == "DecisionTree":
        params = {
            "max_depth": trial.suggest_int("dt_max_depth", 3, 30),
            "min_samples_split": trial.suggest_int("dt_min_samples_split", 2, 20),
            "min_samples_leaf": trial.suggest_int("dt_min_samples_leaf", 1, 10),
            "max_features": trial.suggest_categorical("dt_max_features", ["sqrt", "log2", None])
        }
        model = DecisionTreeClassifier(**params, random_state=42)
    
    # -----------------------------
    # XGBoost
    # -----------------------------
    elif model_name == "XGBoost":
        params = {
            "n_estimators": trial.suggest_int("xgb_n_estimators", 100, 1000),
            "learning_rate": trial.suggest_float("xgb_learning_rate", 0.01, 0.3),
            "max_depth": trial.suggest_int("xgb_max_depth", 3, 15),
            "subsample": trial.suggest_float("xgb_subsample", 0.5, 1.0),
            "colsample_bytree": trial.suggest_float("xgb_colsample_bytree", 0.5, 1.0),
            "gamma": trial.suggest_float("xgb_gamma", 0, 10),
            "min_child_weight": trial.suggest_int("xgb_min_child_weight", 1, 10)
        }
        model = xgb.XGBClassifier(**params, random_state=42, use_label_encoder=False, eval_metric="logloss")
    
    # -----------------------------
    # LightGBM
    # -----------------------------
    elif model_name == "LightGBM":
        params = {
            "num_leaves": trial.suggest_int("lgb_num_leaves", 20, 200),
            "max_depth": trial.suggest_int("lgb_max_depth", -1, 20),
            "learning_rate": trial.suggest_float("lgb_learning_rate", 0.01, 0.3),
            "n_estimators": trial.suggest_int("lgb_n_estimators", 100, 1000),
            "min_child_samples": trial.suggest_int("lgb_min_child_samples", 5, 50),
            "subsample": trial.suggest_float("lgb_subsample", 0.5, 1.0),
            "colsample_bytree": trial.suggest_float("lgb_colsample_bytree", 0.5, 1.0),
            "reg_alpha": trial.suggest_float("lgb_reg_alpha", 0, 10),
            "reg_lambda": trial.suggest_float("lgb_reg_lambda", 0, 10)
        }
        model = lgb.LGBMClassifier(**params, random_state=42)
    
    # -----------------------------
    # CatBoost
    # -----------------------------
    elif model_name == "CatBoost":
        params = {
            "iterations": trial.suggest_int("cat_iterations", 100, 1000),
            "depth": trial.suggest_int("cat_depth", 3, 12),
            "learning_rate": trial.suggest_float("cat_learning_rate", 0.01, 0.3),
            "l2_leaf_reg": trial.suggest_float("cat_l2_leaf_reg", 1, 10),
            "border_count": trial.suggest_int("cat_border_count", 32, 255)
        }
        model = CatBoostClassifier(**params, random_state=42, verbose=0)
    
    # -----------------------------
    # KNN
    # -----------------------------
    elif model_name == "KNN":
        params = {
            "n_neighbors": trial.suggest_int("knn_n_neighbors", 1, 50),
            "weights": trial.suggest_categorical("knn_weights", ["uniform", "distance"]),
            "p": trial.suggest_int("knn_p", 1, 2)
        }
        model = KNeighborsClassifier(**params)
    
    # -----------------------------
    # Logistic Regression
    # -----------------------------
    elif model_name == "LogisticRegression":
        params = {
            "penalty": trial.suggest_categorical("log_penalty", ["l1", "l2", "elasticnet", "none"]),
            "C": trial.suggest_float("log_C", 0.001, 10, log=True),
            "solver": trial.suggest_categorical("log_solver", ["liblinear", "saga", "lbfgs"])
        }
        if params["penalty"] == "elasticnet":
            params["l1_ratio"] = trial.suggest_float("log_l1_ratio", 0, 1)
        model = LogisticRegression(max_iter=2000, random_state=42, **params)
    
    # -----------------------------
    # Fit & Evaluate
    # -----------------------------
    model.fit(X_train, y_train)
    preds = model.predict(X_valid)
    return accuracy_score(y_valid, preds)


# Unified Regression Objective

In [None]:
def objective_regression(trial):
    model_name = trial.suggest_categorical("model", ["RandomForest", "DecisionTree", "XGBoost", 
                                                     "LightGBM", "CatBoost", "KNN", 
                                                     "Linear", "Ridge", "Lasso", "ElasticNet", "Polynomial"])
    
    # -----------------------------
    # Random Forest
    # -----------------------------
    if model_name == "RandomForest":
        params = {
            "n_estimators": trial.suggest_int("rf_n_estimators", 100, 1000),
            "max_depth": trial.suggest_int("rf_max_depth", 3, 30),
            "min_samples_split": trial.suggest_int("rf_min_samples_split", 2, 20),
            "min_samples_leaf": trial.suggest_int("rf_min_samples_leaf", 1, 10),
            "max_features": trial.suggest_categorical("rf_max_features", ["sqrt", "log2", None]),
            "bootstrap": trial.suggest_categorical("rf_bootstrap", [True, False])
        }
        model = RandomForestRegressor(**params, random_state=42)
    
    # -----------------------------
    # Decision Tree
    # -----------------------------
    elif model_name == "DecisionTree":
        params = {
            "max_depth": trial.suggest_int("dt_max_depth", 3, 30),
            "min_samples_split": trial.suggest_int("dt_min_samples_split", 2, 20),
            "min_samples_leaf": trial.suggest_int("dt_min_samples_leaf", 1, 10),
            "max_features": trial.suggest_categorical("dt_max_features", ["sqrt", "log2", None])
        }
        model = DecisionTreeRegressor(**params, random_state=42)
    
    # -----------------------------
    # XGBoost
    # -----------------------------
    elif model_name == "XGBoost":
        params = {
            "n_estimators": trial.suggest_int("xgb_n_estimators", 100, 1000),
            "learning_rate": trial.suggest_float("xgb_learning_rate", 0.01, 0.3),
            "max_depth": trial.suggest_int("xgb_max_depth", 3, 15),
            "subsample": trial.suggest_float("xgb_subsample", 0.5, 1.0),
            "colsample_bytree": trial.suggest_float("xgb_colsample_bytree", 0.5, 1.0),
            "gamma": trial.suggest_float("xgb_gamma", 0, 10),
            "min_child_weight": trial.suggest_int("xgb_min_child_weight", 1, 10)
        }
        model = xgb.XGBRegressor(**params, random_state=42)
    
    # -----------------------------
    # LightGBM
    # -----------------------------
    elif model_name == "LightGBM":
        params = {
            "num_leaves": trial.suggest_int("lgb_num_leaves", 20, 200),
            "max_depth": trial.suggest_int("lgb_max_depth", -1, 20),
            "learning_rate": trial.suggest_float("lgb_learning_rate", 0.01, 0.3),
            "n_estimators": trial.suggest_int("lgb_n_estimators", 100, 1000),
            "min_child_samples": trial.suggest_int("lgb_min_child_samples", 5, 50),
            "subsample": trial.suggest_float("lgb_subsample", 0.5, 1.0),
            "colsample_bytree": trial.suggest_float("lgb_colsample_bytree", 0.5, 1.0),
            "reg_alpha": trial.suggest_float("lgb_reg_alpha", 0, 10),
            "reg_lambda": trial.suggest_float("lgb_reg_lambda", 0, 10)
        }
        model = lgb.LGBMRegressor(**params, random_state=42)
    
    # -----------------------------
    # CatBoost
    # -----------------------------
    elif model_name == "CatBoost":
        params = {
            "iterations": trial.suggest_int("cat_iterations", 100, 1000),
            "depth": trial.suggest_int("cat_depth", 3, 12),
            "learning_rate": trial.suggest_float("cat_learning_rate", 0.01, 0.3),
            "l2_leaf_reg": trial.suggest_float("cat_l2_leaf_reg", 1, 10),
            "border_count": trial.suggest_int("cat_border_count", 32, 255)
        }
        model = CatBoostRegressor(**params, random_state=42, verbose=0)
    
    # -----------------------------
    # KNN
    # -----------------------------
    elif model_name == "KNN":
        params = {
            "n_neighbors": trial.suggest_int("knn_n_neighbors", 1, 50),
            "weights": trial.suggest_categorical("knn_weights", ["uniform", "distance"]),
            "p": trial.suggest_int("knn_p", 1, 2)
        }
        model = KNeighborsRegressor(**params)
    
    # -----------------------------
    # Linear Regression
    # -----------------------------
    elif model_name == "Linear":
        model = LinearRegression()
    
    # -----------------------------
    # Ridge
    # -----------------------------
    elif model_name == "Ridge":
        alpha = trial.suggest_float("ridge_alpha", 0.0001, 10, log=True)
        model = Ridge(alpha=alpha, random_state=42, max_iter=2000)
    
    # -----------------------------
    # Lasso
    # -----------------------------
    elif model_name == "Lasso":
        alpha = trial.suggest_float("lasso_alpha", 0.0001, 10, log=True)
        model = Lasso(alpha=alpha, random_state=42, max_iter=2000)
    
    # -----------------------------
    # ElasticNet
    # -----------------------------
    elif model_name == "ElasticNet":
        alpha = trial.suggest_float("enet_alpha", 0.0001, 10, log=True)
        l1_ratio = trial.suggest_float("enet_l1_ratio", 0, 1)
        model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42, max_iter=2000)
    
    # -----------------------------
    # Polynomial Regression
    # -----------------------------
    elif model_name == "Polynomial":
        degree = trial.suggest_int("poly_degree", 2, 5)
        model = Pipeline([
            ("poly", PolynomialFeatures(degree=degree)),
            ("lin_reg", LinearRegression())
        ])
    
    # -----------------------------
    # Fit & Evaluate
    # -----------------------------
    model.fit(X_train, y_train)
    preds = model.predict(X_valid)
    return mean_squared_error(y_valid, preds)


- This is for liitle less processor

# Unified Classification Objective (Essential Hyperparameters)

In [None]:
def objective_classification(trial):
    model_name = trial.suggest_categorical("model", ["RandomForest", "DecisionTree", "XGBoost", 
                                                     "LightGBM", "CatBoost", "KNN", "LogisticRegression"])
    
    if model_name == "RandomForest":
        model = RandomForestClassifier(
            n_estimators=trial.suggest_int("rf_n_estimators", 100, 200),
            max_depth=trial.suggest_int("rf_max_depth", 5, 15),
            random_state=42
        )
    
    elif model_name == "DecisionTree":
        model = DecisionTreeClassifier(
            max_depth=trial.suggest_int("dt_max_depth", 5, 15),
            min_samples_leaf=trial.suggest_int("dt_min_samples_leaf", 1, 5),
            criterion=trial.suggest_categorical("dt_criterion", ["gini", "entropy"]),
            random_state=42
        )
    
    elif model_name == "XGBoost":
        model = xgb.XGBClassifier(
            n_estimators=trial.suggest_int("xgb_n_estimators", 100, 200),
            max_depth=trial.suggest_int("xgb_max_depth", 3, 7),
            learning_rate=trial.suggest_float("xgb_learning_rate", 0.05, 0.2),
            random_state=42,
            use_label_encoder=False,
            eval_metric="logloss"
        )
    
    elif model_name == "LightGBM":
        model = lgb.LGBMClassifier(
            n_estimators=trial.suggest_int("lgb_n_estimators", 100, 200),
            max_depth=trial.suggest_int("lgb_max_depth", 3, 10),
            num_leaves=trial.suggest_int("lgb_num_leaves", 20, 50),
            learning_rate=trial.suggest_float("lgb_learning_rate", 0.05, 0.15),
            random_state=42
        )
    
    elif model_name == "CatBoost":
        model = CatBoostClassifier(
            iterations=trial.suggest_int("cat_iterations", 100, 200),
            depth=trial.suggest_int("cat_depth", 3, 6),
            learning_rate=trial.suggest_float("cat_learning_rate", 0.05, 0.2),
            random_state=42,
            verbose=0
        )
    
    elif model_name == "KNN":
        model = KNeighborsClassifier(
            n_neighbors=trial.suggest_int("knn_n_neighbors", 3, 15)
        )
    
    elif model_name == "LogisticRegression":
        model = LogisticRegression(
            C=trial.suggest_float("log_C", 0.1, 1.0),
            penalty="l2",
            solver="lbfgs",
            max_iter=2000,
            random_state=42
        )

    elif model_name == "SVM":
        model = SVC(
            C=trial.suggest_float("svm_C", 0.1, 10.0, log=True),
            kernel=trial.suggest_categorical("svm_kernel", ["linear", "rbf"]),
            gamma=trial.suggest_categorical("svm_gamma", ["scale", "auto"]),
            probability=True,
            random_state=42
        )

    elif model_name == "SVM":
        C = trial.suggest_float("svm_C", 0.1, 100.0, log=True)
        kernel = trial.suggest_categorical("svm_kernel", ["linear", "rbf", "poly", "sigmoid"])
        gamma = trial.suggest_categorical("svm_gamma", ["scale", "auto"])
        degree = trial.suggest_int("svm_degree", 2, 5) if kernel == "poly" else 3

        model = SVC(
            C=C,
            kernel=kernel,
            gamma=gamma,
            degree=degree,
            probability=True,   # needed for predict_proba()
            random_state=42
        )

    
    model.fit(X_train, y_train)
    preds = model.predict(X_valid)
    return accuracy_score(y_valid, preds)


# Unified Regression Objective (Essential Hyperparameters)

In [None]:
def objective_regression(trial):
    model_name = trial.suggest_categorical("model", ["RandomForest", "DecisionTree", "XGBoost", 
                                                     "LightGBM", "CatBoost", "KNN", 
                                                     "Linear", "Ridge", "Lasso", "ElasticNet", "Polynomial"])
    
    if model_name == "RandomForest":
        model = RandomForestRegressor(
            n_estimators=trial.suggest_int("rf_n_estimators", 100, 200),
            max_depth=trial.suggest_int("rf_max_depth", 5, 15),
            random_state=42
        )
    
    elif model_name == "DecisionTree":
        model = DecisionTreeRegressor(
            max_depth=trial.suggest_int("dt_max_depth", 5, 15),
            min_samples_leaf=trial.suggest_int("dt_min_samples_leaf", 1, 5),
            criterion=trial.suggest_categorical("dt_criterion", ["squared_error", "friedman_mse", "absolute_error"]),
            random_state=42
        )

    
    elif model_name == "XGBoost":
        model = xgb.XGBRegressor(
            n_estimators=trial.suggest_int("xgb_n_estimators", 100, 200),
            max_depth=trial.suggest_int("xgb_max_depth", 3, 7),
            learning_rate=trial.suggest_float("xgb_learning_rate", 0.05, 0.2),
            random_state=42
        )
    
    elif model_name == "LightGBM":
        model = lgb.LGBMRegressor(
            n_estimators=trial.suggest_int("lgb_n_estimators", 100, 200),
            max_depth=trial.suggest_int("lgb_max_depth", 3, 10),
            num_leaves=trial.suggest_int("lgb_num_leaves", 20, 50),
            learning_rate=trial.suggest_float("lgb_learning_rate", 0.05, 0.15),
            random_state=42
        )
    
    elif model_name == "CatBoost":
        model = CatBoostRegressor(
            iterations=trial.suggest_int("cat_iterations", 100, 200),
            depth=trial.suggest_int("cat_depth", 3, 6),
            learning_rate=trial.suggest_float("cat_learning_rate", 0.05, 0.2),
            random_state=42,
            verbose=0
        )
    
    elif model_name == "KNN":
        model = KNeighborsRegressor(
            n_neighbors=trial.suggest_int("knn_n_neighbors", 3, 15)
        )
    
    elif model_name == "Linear":
        model = LinearRegression()
    
    elif model_name == "Ridge":
        model = Ridge(
            alpha=trial.suggest_float("ridge_alpha", 0.1, 1.0),
            random_state=42,
            max_iter=2000
        )
    
    elif model_name == "Lasso":
        model = Lasso(
            alpha=trial.suggest_float("lasso_alpha", 0.1, 1.0),
            random_state=42,
            max_iter=2000
        )
    
    elif model_name == "ElasticNet":
        model = ElasticNet(
            alpha=trial.suggest_float("enet_alpha", 0.1, 1.0),
            l1_ratio=trial.suggest_float("enet_l1_ratio", 0, 1),
            random_state=42,
            max_iter=2000
        )
    
    elif model_name == "Polynomial":
        degree = trial.suggest_int("poly_degree", 2, 3)
        model = Pipeline([
            ("poly", PolynomialFeatures(degree=degree)),
            ("lin_reg", LinearRegression())
        ])

    elif model_name == "SVR":
        model = SVR(
            C=trial.suggest_float("svr_C", 0.1, 10.0, log=True),
            kernel=trial.suggest_categorical("svr_kernel", ["linear", "rbf"]),
            gamma=trial.suggest_categorical("svr_gamma", ["scale", "auto"])
        )

    elif model_name == "SVR":
        C = trial.suggest_float("svr_C", 0.1, 100.0, log=True)
        kernel = trial.suggest_categorical("svr_kernel", ["linear", "rbf", "poly", "sigmoid"])
        gamma = trial.suggest_categorical("svr_gamma", ["scale", "auto"])
        degree = trial.suggest_int("svr_degree", 2, 5) if kernel == "poly" else 3

        model = SVR(
            C=C,
            kernel=kernel,
            gamma=gamma,
            degree=degree
        )

    
    model.fit(X_train, y_train)
    preds = model.predict(X_valid)
    return mean_squared_error(y_valid, preds)


# Using Deepseek

# Classification Objective Function

In [None]:
import optuna
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import cross_val_score

def objective_classification(trial, X, y):
    # Let the trial suggest which model to use
    classifier_name = trial.suggest_categorical('classifier', [
        'RandomForest', 'XGBoost', 'LightGBM', 'CatBoost', 
        'KNN', 'LogisticRegression', 'DecisionTree', 'SVM'
    ])
    
    if classifier_name == 'RandomForest':
        n_estimators = trial.suggest_int('rf_n_estimators', 50, 500)
        max_depth = trial.suggest_int('rf_max_depth', 3, 20)
        min_samples_split = trial.suggest_int('rf_min_samples_split', 2, 20)
        min_samples_leaf = trial.suggest_int('rf_min_samples_leaf', 1, 10)
        max_features = trial.suggest_categorical('rf_max_features', ['sqrt', 'log2', None])
        
        model = RandomForestClassifier(
            n_estimators=n_estimators,
            max_depth=max_depth,
            min_samples_split=min_samples_split,
            min_samples_leaf=min_samples_leaf,
            max_features=max_features,
            random_state=42,
            n_jobs=-1
        )
        
    elif classifier_name == 'XGBoost':
        n_estimators = trial.suggest_int('xgb_n_estimators', 50, 500)
        max_depth = trial.suggest_int('xgb_max_depth', 3, 15)
        learning_rate = trial.suggest_float('xgb_learning_rate', 0.01, 0.3, log=True)
        subsample = trial.suggest_float('xgb_subsample', 0.6, 1.0)
        colsample_bytree = trial.suggest_float('xgb_colsample_bytree', 0.6, 1.0)
        reg_alpha = trial.suggest_float('xgb_reg_alpha', 0, 10)
        reg_lambda = trial.suggest_float('xgb_reg_lambda', 1, 10)
        
        model = XGBClassifier(
            n_estimators=n_estimators,
            max_depth=max_depth,
            learning_rate=learning_rate,
            subsample=subsample,
            colsample_bytree=colsample_bytree,
            reg_alpha=reg_alpha,
            reg_lambda=reg_lambda,
            random_state=42,
            n_jobs=-1,
            use_label_encoder=False,
            eval_metric='logloss'
        )
        
    elif classifier_name == 'LightGBM':
        n_estimators = trial.suggest_int('lgb_n_estimators', 50, 500)
        num_leaves = trial.suggest_int('lgb_num_leaves', 20, 150)
        learning_rate = trial.suggest_float('lgb_learning_rate', 0.01, 0.3, log=True)
        subsample = trial.suggest_float('lgb_subsample', 0.6, 1.0)
        colsample_bytree = trial.suggest_float('lgb_colsample_bytree', 0.6, 1.0)
        reg_alpha = trial.suggest_float('lgb_reg_alpha', 0, 10)
        reg_lambda = trial.suggest_float('lgb_reg_lambda', 0, 10)
        
        model = LGBMClassifier(
            n_estimators=n_estimators,
            num_leaves=num_leaves,
            learning_rate=learning_rate,
            subsample=subsample,
            colsample_bytree=colsample_bytree,
            reg_alpha=reg_alpha,
            reg_lambda=reg_lambda,
            random_state=42,
            n_jobs=-1
        )
        
    elif classifier_name == 'CatBoost':
        iterations = trial.suggest_int('cb_iterations', 50, 500)
        depth = trial.suggest_int('cb_depth', 4, 10)
        learning_rate = trial.suggest_float('cb_learning_rate', 0.01, 0.3, log=True)
        l2_leaf_reg = trial.suggest_float('cb_l2_leaf_reg', 1, 10)
        border_count = trial.suggest_int('cb_border_count', 32, 255)
        
        model = CatBoostClassifier(
            iterations=iterations,
            depth=depth,
            learning_rate=learning_rate,
            l2_leaf_reg=l2_leaf_reg,
            border_count=border_count,
            random_state=42,
            verbose=0
        )
        
    elif classifier_name == 'KNN':
        n_neighbors = trial.suggest_int('knn_n_neighbors', 3, 50)
        weights = trial.suggest_categorical('knn_weights', ['uniform', 'distance'])
        p = trial.suggest_int('knn_p', 1, 2)  # 1: Manhattan, 2: Euclidean
        
        model = KNeighborsClassifier(
            n_neighbors=n_neighbors,
            weights=weights,
            p=p,
            n_jobs=-1
        )
        
    elif classifier_name == 'LogisticRegression':
        C = trial.suggest_float('lr_C', 0.01, 10, log=True)
        penalty = trial.suggest_categorical('lr_penalty', ['l2', 'none'])
        solver = 'lbfgs' if penalty == 'l2' else 'saga'
        
        model = LogisticRegression(
            C=C,
            penalty=penalty,
            solver=solver,
            random_state=42,
            max_iter=1000,
            n_jobs=-1
        )
        
    elif classifier_name == 'DecisionTree':
        max_depth = trial.suggest_int('dt_max_depth', 3, 20)
        min_samples_split = trial.suggest_int('dt_min_samples_split', 2, 20)
        min_samples_leaf = trial.suggest_int('dt_min_samples_leaf', 1, 10)
        max_features = trial.suggest_categorical('dt_max_features', ['sqrt', 'log2', None])
        
        model = DecisionTreeClassifier(
            max_depth=max_depth,
            min_samples_split=min_samples_split,
            min_samples_leaf=min_samples_leaf,
            max_features=max_features,
            random_state=42
        )
        
    elif classifier_name == 'SVM':
        C = trial.suggest_float('svm_C', 0.1, 10, log=True)
        kernel = trial.suggest_categorical('svm_kernel', ['linear', 'rbf', 'poly'])
        gamma = trial.suggest_categorical('svm_gamma', ['scale', 'auto'])
        
        model = SVC(
            C=C,
            kernel=kernel,
            gamma=gamma,
            random_state=42,
            probability=True
        )
    
    # Use cross-validation for more robust evaluation
    score = cross_val_score(model, X, y, n_jobs=-1, cv=5, scoring='f1_macro').mean()
    return score

# Regression Objective Function

In [None]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from sklearn.metrics import mean_squared_error, r2_score

def objective_regression(trial, X, y):
    # Let the trial suggest which model to use
    regressor_name = trial.suggest_categorical('regressor', [
        'RandomForest', 'XGBoost', 'LightGBM', 'CatBoost', 
        'KNN', 'LinearRegression', 'Ridge', 'Lasso', 
        'ElasticNet', 'Polynomial', 'DecisionTree', 'SVM'
    ])
    
    if regressor_name == 'RandomForest':
        n_estimators = trial.suggest_int('rf_n_estimators', 50, 500)
        max_depth = trial.suggest_int('rf_max_depth', 3, 20)
        min_samples_split = trial.suggest_int('rf_min_samples_split', 2, 20)
        min_samples_leaf = trial.suggest_int('rf_min_samples_leaf', 1, 10)
        max_features = trial.suggest_categorical('rf_max_features', ['sqrt', 'log2', None])
        
        model = RandomForestRegressor(
            n_estimators=n_estimators,
            max_depth=max_depth,
            min_samples_split=min_samples_split,
            min_samples_leaf=min_samples_leaf,
            max_features=max_features,
            random_state=42,
            n_jobs=-1
        )
        
    elif regressor_name == 'XGBoost':
        n_estimators = trial.suggest_int('xgb_n_estimators', 50, 500)
        max_depth = trial.suggest_int('xgb_max_depth', 3, 15)
        learning_rate = trial.suggest_float('xgb_learning_rate', 0.01, 0.3, log=True)
        subsample = trial.suggest_float('xgb_subsample', 0.6, 1.0)
        colsample_bytree = trial.suggest_float('xgb_colsample_bytree', 0.6, 1.0)
        reg_alpha = trial.suggest_float('xgb_reg_alpha', 0, 10)
        reg_lambda = trial.suggest_float('xgb_reg_lambda', 1, 10)
        
        model = XGBRegressor(
            n_estimators=n_estimators,
            max_depth=max_depth,
            learning_rate=learning_rate,
            subsample=subsample,
            colsample_bytree=colsample_bytree,
            reg_alpha=reg_alpha,
            reg_lambda=reg_lambda,
            random_state=42,
            n_jobs=-1
        )
        
    elif regressor_name == 'LightGBM':
        n_estimators = trial.suggest_int('lgb_n_estimators', 50, 500)
        num_leaves = trial.suggest_int('lgb_num_leaves', 20, 150)
        learning_rate = trial.suggest_float('lgb_learning_rate', 0.01, 0.3, log=True)
        subsample = trial.suggest_float('lgb_subsample', 0.6, 1.0)
        colsample_bytree = trial.suggest_float('lgb_colsample_bytree', 0.6, 1.0)
        reg_alpha = trial.suggest_float('lgb_reg_alpha', 0, 10)
        reg_lambda = trial.suggest_float('lgb_reg_lambda', 0, 10)
        
        model = LGBMRegressor(
            n_estimators=n_estimators,
            num_leaves=num_leaves,
            learning_rate=learning_rate,
            subsample=subsample,
            colsample_bytree=colsample_bytree,
            reg_alpha=reg_alpha,
            reg_lambda=reg_lambda,
            random_state=42,
            n_jobs=-1
        )
        
    elif regressor_name == 'CatBoost':
        iterations = trial.suggest_int('cb_iterations', 50, 500)
        depth = trial.suggest_int('cb_depth', 4, 10)
        learning_rate = trial.suggest_float('cb_learning_rate', 0.01, 0.3, log=True)
        l2_leaf_reg = trial.suggest_float('cb_l2_leaf_reg', 1, 10)
        border_count = trial.suggest_int('cb_border_count', 32, 255)
        
        model = CatBoostRegressor(
            iterations=iterations,
            depth=depth,
            learning_rate=learning_rate,
            l2_leaf_reg=l2_leaf_reg,
            border_count=border_count,
            random_state=42,
            verbose=0
        )
        
    elif regressor_name == 'KNN':
        n_neighbors = trial.suggest_int('knn_n_neighbors', 3, 50)
        weights = trial.suggest_categorical('knn_weights', ['uniform', 'distance'])
        p = trial.suggest_int('knn_p', 1, 2)
        
        model = KNeighborsRegressor(
            n_neighbors=n_neighbors,
            weights=weights,
            p=p,
            n_jobs=-1
        )
        
    elif regressor_name == 'LinearRegression':
        # Linear regression has no hyperparameters to tune
        model = LinearRegression(n_jobs=-1)
        
    elif regressor_name == 'Ridge':
        alpha = trial.suggest_float('ridge_alpha', 0.1, 10, log=True)
        
        model = Ridge(alpha=alpha, random_state=42)
        
    elif regressor_name == 'Lasso':
        alpha = trial.suggest_float('lasso_alpha', 0.1, 10, log=True)
        
        model = Lasso(alpha=alpha, random_state=42, max_iter=10000)
        
    elif regressor_name == 'ElasticNet':
        alpha = trial.suggest_float('enet_alpha', 0.1, 10, log=True)
        l1_ratio = trial.suggest_float('enet_l1_ratio', 0, 1)
        
        model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42, max_iter=10000)
        
    elif regressor_name == 'Polynomial':
        degree = trial.suggest_int('poly_degree', 2, 4)
        include_bias = trial.suggest_categorical('poly_include_bias', [True, False])
        
        # Create pipeline with polynomial features and linear regression
        model = Pipeline([
            ('poly', PolynomialFeatures(degree=degree, include_bias=include_bias)),
            ('linear', LinearRegression())
        ])
        
    elif regressor_name == 'DecisionTree':
        max_depth = trial.suggest_int('dt_max_depth', 3, 20)
        min_samples_split = trial.suggest_int('dt_min_samples_split', 2, 20)
        min_samples_leaf = trial.suggest_int('dt_min_samples_leaf', 1, 10)
        max_features = trial.suggest_categorical('dt_max_features', ['sqrt', 'log2', None])
        
        model = DecisionTreeRegressor(
            max_depth=max_depth,
            min_samples_split=min_samples_split,
            min_samples_leaf=min_samples_leaf,
            max_features=max_features,
            random_state=42
        )
        
    elif regressor_name == 'SVM':
        C = trial.suggest_float('svm_C', 0.1, 10, log=True)
        kernel = trial.suggest_categorical('svm_kernel', ['linear', 'rbf', 'poly'])
        gamma = trial.suggest_categorical('svm_gamma', ['scale', 'auto'])
        
        model = SVR(C=C, kernel=kernel, gamma=gamma)
    
    # Use cross-validation for more robust evaluation
    score = cross_val_score(model, X, y, n_jobs=-1, cv=5, scoring='r2').mean()
    return score

In [None]:
# For classification
study = optuna.create_study(direction='maximize')
study.optimize(lambda trial: objective_classification(trial, X_train, y_train), n_trials=100)

# For regression
study = optuna.create_study(direction='maximize')  # maximize R² score
study.optimize(lambda trial: objective_regression(trial, X_train, y_train), n_trials=100)

# Get best parameters
print("Best parameters:", study.best_params)
print("Best score:", study.best_value)

# For Perplexity

# Optuna Objective Function Templates
# Classification Objective (if/elif block)

In [None]:
def objective_classification(trial):
    model_type = trial.suggest_categorical('model_type', [
        'random_forest', 'xgboost', 'lightgbm', 'catboost',
        'knn', 'logistic_regression', 'decision_tree', 'svm'
    ])

    if model_type == 'random_forest':
        params = {
            'n_estimators': trial.suggest_int('n_estimators', 50, 500),
            'max_depth': trial.suggest_int('max_depth', 2, 40),
            'min_samples_split': trial.suggest_int('min_samples_split', 2, 10),
            'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 4),
            'max_features': trial.suggest_categorical('max_features', ['auto', 'sqrt', 'log2']),
            'bootstrap': trial.suggest_categorical('bootstrap', [True, False])
        }
        model = RandomForestClassifier(**params)
    elif model_type == 'xgboost':
        params = {
            'n_estimators': trial.suggest_int('n_estimators', 100, 500),
            'max_depth': trial.suggest_int('max_depth', 3, 15),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
            'subsample': trial.suggest_float('subsample', 0.2, 1.0),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.2, 1.0),
            'gamma': trial.suggest_float('gamma', 0.0, 1.0),
            'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 1.0, log=True),
            'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 1.0, log=True)
        }
        model = XGBClassifier(**params)
    elif model_type == 'lightgbm':
        params = {
            'num_leaves': trial.suggest_int('num_leaves', 20, 200),
            'max_depth': trial.suggest_int('max_depth', 3, 15),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
            'bagging_fraction': trial.suggest_float('bagging_fraction', 0.4, 1.0),
            'feature_fraction': trial.suggest_float('feature_fraction', 0.4, 1.0),
            'lambda_l1': trial.suggest_float('lambda_l1', 1e-8, 10.0, log=True),
            'lambda_l2': trial.suggest_float('lambda_l2', 1e-8, 10.0, log=True),
            'min_child_samples': trial.suggest_int('min_child_samples', 5, 100)
        }
        model = LGBMClassifier(**params)
    elif model_type == 'catboost':
        params = {
            'iterations': trial.suggest_int('iterations', 100, 500),
            'depth': trial.suggest_int('depth', 3, 10),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
            'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1, 10),
            'border_count': trial.suggest_int('border_count', 32, 128)
        }
        model = CatBoostClassifier(**params)
    elif model_type == 'knn':
        params = {
            'n_neighbors': trial.suggest_int('n_neighbors', 3, 30),
            'weights': trial.suggest_categorical('weights', ['uniform', 'distance']),
            'p': trial.suggest_int('p', 1, 2)
        }
        model = KNeighborsClassifier(**params)
    elif model_type == 'logistic_regression':
        params = {
            'C': trial.suggest_float('C', 0.001, 50),
            'penalty': trial.suggest_categorical('penalty', ['l1', 'l2', 'elasticnet', 'none']),
            'solver': trial.suggest_categorical('solver', ['liblinear', 'saga']),
            'max_iter': trial.suggest_int('max_iter', 100, 1000)
        }
        model = LogisticRegression(**params)
    elif model_type == 'decision_tree':
        params = {
            'max_depth': trial.suggest_int('max_depth', 2, 40),
            'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
            'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 20),
            'criterion': trial.suggest_categorical('criterion', ['gini', 'entropy'])
        }
        model = DecisionTreeClassifier(**params)
    elif model_type == 'svm':
        params = {
            'C': trial.suggest_float('C', 0.01, 100),
            'kernel': trial.suggest_categorical('kernel', ['linear', 'rbf', 'poly', 'sigmoid']),
            'gamma': trial.suggest_categorical('gamma', ['scale', 'auto'])
        }
        model = SVC(**params)

    score = cross_val_score(model, X, y, cv=3, scoring='accuracy').mean()
    return score


# Regression Objective (adds linear, ridge, lasso, elasticnet, polynomial)

In [None]:
def objective_regression(trial):
    model_type = trial.suggest_categorical('model_type', [
        'random_forest', 'xgboost', 'lightgbm', 'catboost', 'knn',
        'linear', 'ridge', 'lasso', 'elasticnet', 'polynomial',
        'decision_tree', 'svm'
    ])

    if model_type == 'random_forest':
        params = {
            'n_estimators': trial.suggest_int('n_estimators', 50, 500),
            'max_depth': trial.suggest_int('max_depth', 2, 40),
            'min_samples_split': trial.suggest_int('min_samples_split', 2, 10),
            'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 4),
            'max_features': trial.suggest_categorical('max_features', ['auto', 'sqrt', 'log2']),
            'bootstrap': trial.suggest_categorical('bootstrap', [True, False])
        }
        model = RandomForestRegressor(**params)
    elif model_type == 'xgboost':
        params = {
            'n_estimators': trial.suggest_int('n_estimators', 100, 500),
            'max_depth': trial.suggest_int('max_depth', 3, 15),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
            'subsample': trial.suggest_float('subsample', 0.2, 1.0),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.2, 1.0),
            'gamma': trial.suggest_float('gamma', 0.0, 1.0),
            'reg_alpha': trial.suggest_float('reg_alpha', 1e-8, 1.0, log=True),
            'reg_lambda': trial.suggest_float('reg_lambda', 1e-8, 1.0, log=True)
        }
        model = XGBRegressor(**params)
    elif model_type == 'lightgbm':
        params = {
            'num_leaves': trial.suggest_int('num_leaves', 20, 200),
            'max_depth': trial.suggest_int('max_depth', 3, 15),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
            'bagging_fraction': trial.suggest_float('bagging_fraction', 0.4, 1.0),
            'feature_fraction': trial.suggest_float('feature_fraction', 0.4, 1.0),
            'lambda_l1': trial.suggest_float('lambda_l1', 1e-8, 10.0, log=True),
            'lambda_l2': trial.suggest_float('lambda_l2', 1e-8, 10.0, log=True),
            'min_child_samples': trial.suggest_int('min_child_samples', 5, 100)
        }
        model = LGBMRegressor(**params)
    elif model_type == 'catboost':
        params = {
            'iterations': trial.suggest_int('iterations', 100, 500),
            'depth': trial.suggest_int('depth', 3, 10),
            'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
            'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1, 10),
            'border_count': trial.suggest_int('border_count', 32, 128)
        }
        model = CatBoostRegressor(**params)
    elif model_type == 'knn':
        params = {
            'n_neighbors': trial.suggest_int('n_neighbors', 3, 30),
            'weights': trial.suggest_categorical('weights', ['uniform', 'distance']),
            'p': trial.suggest_int('p', 1, 2)
        }
        model = KNeighborsRegressor(**params)
    elif model_type == 'linear':
        model = LinearRegression()
    elif model_type == 'ridge':
        params = {
            'alpha': trial.suggest_float('alpha', 0.01, 100),
            'solver': trial.suggest_categorical('solver', ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'])
        }
        model = Ridge(**params)
    elif model_type == 'lasso':
        params = {
            'alpha': trial.suggest_float('alpha', 0.01, 1.0),
            'max_iter': trial.suggest_int('max_iter', 1000, 5000)
        }
        model = Lasso(**params)
    elif model_type == 'elasticnet':
        params = {
            'alpha': trial.suggest_float('alpha', 0.01, 1.0),
            'l1_ratio': trial.suggest_float('l1_ratio', 0.1, 1.0),
            'max_iter': trial.suggest_int('max_iter', 1000, 5000)
        }
        model = ElasticNet(**params)
    elif model_type == 'polynomial':
        degree = trial.suggest_int('degree', 2, 5)
        model = make_pipeline(PolynomialFeatures(degree), LinearRegression())
    elif model_type == 'decision_tree':
        params = {
            'max_depth': trial.suggest_int('max_depth', 2, 40),
            'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
            'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 20),
            'criterion': trial.suggest_categorical('criterion', ['mse', 'friedman_mse'])
        }
        model = DecisionTreeRegressor(**params)
    elif model_type == 'svm':
        params = {
            'C': trial.suggest_float('C', 0.01, 100),
            'kernel': trial.suggest_categorical('kernel', ['linear', 'rbf', 'poly', 'sigmoid']),
            'gamma': trial.suggest_categorical('gamma', ['scale', 'auto'])
        }
        model = SVR(**params)

    score = cross_val_score(model, X, y, cv=3, scoring='neg_mean_squared_error').mean()
    return score
