<a href="https://colab.research.google.com/github/ABBAS-37405/PYTHON-AND-DATA-SCIENCE/blob/main/HyperParam_XGBoost_ML_Models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **XGBoost Regressor**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the dataset
df = pd.read_csv("healthcare_data_10000.csv")

In [None]:
# Step 2: Select numeric columns
numeric_cols = [
    'age', 'bmi', 'systolic_bp', 'diastolic_bp',
    'cholesterol_level', 'glucose_level',
    'exercise_mins_per_week', 'alcohol_units_per_week', 'medications_count'
]
target = 'heart_rate'

# Step 3: Feature matrix (X) and target vector (y)
X = df[numeric_cols]
y = df[target]

# Step 4: Train-test split
X_train, X_test, y_train_reg, y_test_reg = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Step 5: Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
import numpy as np

model = XGBRegressor(n_estimators = 100, learning_rate = 0.1, random_state = 42)
model.fit(X_train_scaled, y_train_reg)

y_pred_reg = model.predict(X_test_scaled)

mse = mean_squared_error(y_pred_reg, y_test_reg)
RMSE = np.sqrt(mse)
print(RMSE)
print(mse)

10.361777831016063
107.36643981933594


In [None]:
pip install scikit-optimize



In [None]:
from skopt import BayesSearchCV

In [None]:
search_spaces = {"n_estimators": (100, 1000, 10),
                 "max_depth": (3, 10),
                 "learning_rate": (0.01, 0.3),
                 "subsample": (0.5, 1.0),
                 "colsample_bytree": (0.5, 1.0),
                 "gamma": (0, 5),
                 "min_child_weight": (1, 10, 2)}

In [None]:
xgb = XGBRegressor(random_state=42)

opt = BayesSearchCV(estimator=xgb,
                    search_spaces=search_spaces,
                    n_iter=30,
                    scoring = 'neg_mean_squared_error',
                    cv = 3,
                    random_state=42,
                    n_jobs= -1,
                    verbose=0)

In [None]:
opt.fit(X_train_scaled, y_train_reg)
print(opt.best_params_)

OrderedDict([('colsample_bytree', 1.0), ('gamma', 5), ('learning_rate', 0.015447726298075719), ('max_depth', 3), ('min_child_weight', 1), ('n_estimators', 10), ('subsample', 0.9838146230207931)])


In [None]:
best_model = opt.best_estimator_
y_pred_reg = best_model.predict(X_test_scaled)

mse = mean_squared_error(y_test_reg, y_pred_reg)
rmse = np.sqrt(mse)
print("MSE:", mse)
print("RMSE:", rmse)

MSE: 103.70429992675781
RMSE: 10.183530818275056
