Evaluation of SVR and gradient boosting regression on steel dataset

In [2]:

#Data prepocessing
import pandas as pd
import numpy as np

from sklearn.model_selection import KFold, cross_validate, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score, make_scorer
from sklearn.ensemble import GradientBoostingRegressor


# Load dataset
df = pd.read_csv("steel.csv")

X = df.drop("tensile_strength", axis=1)
y = df["tensile_strength"]

rmse = make_scorer(lambda y_true, y_pred: np.sqrt(mean_squared_error(y_true, y_pred)))
r2 = make_scorer(r2_score)


# 10-fold cross-validation (default hyperparameters)
model = GradientBoostingRegressor()

cv = KFold(n_splits=10, shuffle=True, random_state=67)

cv_results = cross_validate(
    model,
    X, y,
    cv=cv,
    scoring={'RMSE': rmse, 'R2': r2},
    return_train_score=True
)

print("=== Default Hyperparameters ===")
print("Average Train RMSE:", np.mean(cv_results['train_RMSE']))
print("Average Test RMSE:", np.mean(cv_results['test_RMSE']))
print("Average Train R2:", np.mean(cv_results['train_R2']))
print("Average Test R2:", np.mean(cv_results['test_R2']))

=== Default Hyperparameters ===
Average Train RMSE: 16.97074434820655
Average Test RMSE: 28.359638123865853
Average Train MAE: 0.9652491804618029
Average Test MAE: 0.8999945751787528
