<a href="https://colab.research.google.com/github/Sophinaz/HyperParameter-Tuning/blob/main/HyperParameter_tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install scikit-optimize

Collecting scikit-optimize
  Downloading scikit_optimize-0.10.2-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting pyaml>=16.9 (from scikit-optimize)
  Downloading pyaml-25.1.0-py3-none-any.whl.metadata (12 kB)
Downloading scikit_optimize-0.10.2-py2.py3-none-any.whl (107 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m107.8/107.8 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyaml-25.1.0-py3-none-any.whl (26 kB)
Installing collected packages: pyaml, scikit-optimize
Successfully installed pyaml-25.1.0 scikit-optimize-0.10.2


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder, StandardScaler, LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import GradientBoostingRegressor, ExtraTreesRegressor, AdaBoostRegressor
from sklearn.linear_model import Lasso, ElasticNet, SGDRegressor
from sklearn.svm import SVR
from skopt import BayesSearchCV
from sklearn.compose import ColumnTransformer

In [None]:
!pip install ucimlrepo

Collecting ucimlrepo
  Downloading ucimlrepo-0.0.7-py3-none-any.whl.metadata (5.5 kB)
Downloading ucimlrepo-0.0.7-py3-none-any.whl (8.0 kB)
Installing collected packages: ucimlrepo
Successfully installed ucimlrepo-0.0.7


In [None]:
from ucimlrepo import fetch_ucirepo

# fetch dataset
obesity_level = fetch_ucirepo(id=544)

# data (as pandas dataframes)
X = obesity_level.data.features
y = obesity_level.data.targets


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_pred_encoded = label_encoder.transform(y_test)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


In [None]:
y_pred_encoded

array([0, 4, 0, 0, 1, 0, 3, 5, 2, 0, 2, 4, 3, 3, 4, 6, 4, 3, 2, 6, 2, 5,
       4, 2, 1, 6, 6, 4, 4, 0, 4, 5, 6, 1, 0, 4, 2, 1, 1, 1, 1, 3, 1, 1,
       5, 0, 3, 1, 2, 3, 1, 1, 1, 5, 0, 6, 3, 3, 5, 3, 1, 4, 6, 0, 3, 2,
       1, 4, 0, 1, 6, 6, 0, 0, 5, 3, 3, 1, 4, 3, 1, 2, 3, 3, 2, 2, 1, 6,
       1, 6, 5, 3, 0, 5, 2, 0, 5, 2, 1, 3, 2, 2, 4, 2, 6, 2, 1, 4, 4, 3,
       2, 1, 2, 1, 0, 2, 2, 3, 6, 1, 4, 3, 2, 6, 1, 3, 2, 2, 4, 3, 4, 3,
       1, 2, 6, 4, 0, 4, 5, 2, 5, 6, 0, 0, 1, 3, 2, 4, 0, 2, 0, 0, 1, 3,
       2, 5, 6, 5, 0, 0, 1, 1, 2, 0, 3, 0, 2, 1, 5, 6, 1, 6, 3, 4, 2, 6,
       2, 6, 2, 5, 0, 3, 0, 5, 3, 2, 5, 4, 3, 3, 0, 4, 0, 1, 4, 5, 3, 2,
       4, 0, 3, 3, 4, 4, 1, 4, 5, 5, 2, 4, 5, 2, 0, 2, 3, 0, 5, 3, 3, 3,
       4, 6, 3, 5, 3, 6, 0, 4, 5, 5, 5, 4, 1, 5, 1, 2, 2, 2, 2, 0, 6, 6,
       5, 2, 2, 2, 1, 2, 2, 0, 5, 2, 0, 1, 1, 6, 2, 3, 2, 2, 4, 4, 5, 2,
       2, 4, 4, 1, 1, 4, 2, 4, 6, 2, 3, 2, 3, 4, 3, 1, 0, 5, 6, 4, 4, 5,
       2, 6, 0, 3, 2, 5, 1, 2, 0, 5, 3, 3, 6, 6, 1,

In [None]:
categorical_cols = ['Gender', 'family_history_with_overweight', 'FAVC', 'CAEC', 'SMOKE', 'SCC', 'CALC', 'MTRANS']
numerical_cols = ['Age', 'Height', 'Weight', 'FCVC', 'NCP', 'CH2O', 'FAF', 'TUE']

preprocessor = ColumnTransformer([
    ('num', StandardScaler(), numerical_cols),
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
])

sgd_pipe = Pipeline([
    ('preprocessor', preprocessor),
    ('model', SGDRegressor())
])

gb_pipe = Pipeline([
    ('preprocessor', preprocessor),
    ('model', GradientBoostingRegressor(random_state=42))
])

In [None]:
sgd_param_space = {
    'model__alpha': (1e-5, 1e-1, 'log-uniform'),  # Regularization strength
    'model__l1_ratio': (0, 1),                    # ElasticNet mixing (0=Ridge, 1=Lasso)
    'model__learning_rate': ['constant', 'invscaling', 'adaptive'],
    'model__eta0': (0.01, 1.0)                    # Initial learning rate
}

bayes_search = BayesSearchCV(
    sgd_pipe,
    sgd_param_space,
    n_iter=20,  # Number of Bayesian optimization iterations
    cv=3,
    scoring='neg_mean_squared_error',
    random_state=42,
    n_jobs=-1    # Use all CPU cores
)

bayes_search.fit(X_train, y_train_encoded)
best_sgd = bayes_search.best_estimator_

# Train models
best_sgd.fit(X_train, y_train_encoded)
gb_pipe.fit(X_train, y_train_encoded)

print("Best SGD params:", bayes_search.best_params_)

Best SGD params: OrderedDict([('model__alpha', 0.015783879853890563), ('model__eta0', 0.44364889457651413), ('model__l1_ratio', 1), ('model__learning_rate', 'adaptive')])


In [None]:
def evaluate(model, name):
    y_pred = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_pred_encoded, y_pred))
    r2 = r2_score(y_pred_encoded, y_pred)
    print(f"\n{name} Results:")
    print(f"RMSE: {rmse:.3f}")
    print(f"R²: {r2:.3f}")
    return y_pred

_ = evaluate(best_sgd, "SGD Regressor")
_ = evaluate(gb_pipe, "Gradient Boosting Regressor")


SGD Regressor Results:
RMSE: 1.639
R²: 0.273

Gradient Boosting Regressor Results:
RMSE: 0.862
R²: 0.799
