In [3]:

# CT4101  Support Vector Regression
# Author: Ben Naughton

import pandas as pd
from sklearn.svm import SVR
from sklearn.model_selection import KFold, cross_validate, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_absolute_error, r2_score

data = pd.read_csv("steel.csv")

X = data.drop(columns=["tensile_strength"])
Y = data["tensile_strength"]

# Create a pipeline that scales data then applies SVR
model = make_pipeline(
    StandardScaler(),
    SVR(kernel="rbf")
)

# Define evaluation metrics
metrics = {
    "MAE": "neg_mean_absolute_error",
    "R2": "r2"
}

# 10 fold cross-validation
kfold = KFold(n_splits=10, shuffle=True, random_state=42);

default_results = cross_validate(
    model, X, Y, cv=kfold, scoring=metrics, return_train_score=True
)

print("Default Results")
print(f"Average Train MAE: {-default_results['train_MAE'].mean():.3f}");
print(f"Average Test MAE: {-default_results['test_MAE'].mean():.3f}");
print(f"Average Train R²: {default_results['train_R2'].mean():.3f}");
print(f"Average Test R²: {default_results['test_R2'].mean():.3f}");



param_grid = {
    "svr__C": [0.1, 1, 10, 100],
    "svr__gamma": [0.001, 0.01, 0.1, 1]
};

grid_search = GridSearchCV(
    model,
    param_grid=param_grid,
    cv=kfold,
    scoring="neg_mean_absolute_error",
    n_jobs=-1
)

grid_search.fit(X, Y)

print("Best Parameters:", grid_search.best_params_)
print(f"Best Cross-Validated MAE: {-grid_search.best_score_:.3f}")


best_svr = grid_search.best_estimator_

tuned_results = cross_validate(
    best_svr, X, Y, cv=kfold, scoring=metrics, return_train_score=True
)
print("Tuned Results")
print(f"Average Train MAE: {-tuned_results['train_MAE'].mean():.3f}")
print(f"Average Test MAE: {-tuned_results['test_MAE'].mean():.3f}")
print(f"Average Train R²: {tuned_results['train_R2'].mean():.3f}")
print(f"Average Test R²: {tuned_results['test_R2'].mean():.3f}")




Default Results
Average Train MAE: 61.739
Average Test MAE: 62.582
Average Train R²: 0.244
Average Test R²: 0.234
Best Parameters: {'svr__C': 100, 'svr__gamma': 0.1}
Best Cross-Validated MAE: 25.395
Tuned Results
Average Train MAE: 16.844
Average Test MAE: 25.395
Average Train R²: 0.919
Average Test R²: 0.851


In [2]:


import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import KFold, cross_validate, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# Load dataset
data = pd.read_csv("steel.csv")

# Define features (X) and target variable (y)
X = data.drop(columns=["tensile_strength"])
y = data["tensile_strength"]


model = make_pipeline(
    RandomForestRegressor(random_state=42)
)

# Define evaluation metrics
metrics = {
    "MAE": "neg_mean_absolute_error",
    "R2": "r2"
}

# Define 10-fold cross-validation
kfold = KFold(n_splits=10, shuffle=True, random_state=42)


default_results = cross_validate(
    model, X, y, cv=kfold, scoring=metrics, return_train_score=True, n_jobs=-1
)
print("Default results")
print(f"Average Train MAE: {-default_results['train_MAE'].mean():.3f}");
print(f"Average Test MAE: {-default_results['test_MAE'].mean():.3f}");
print(f"Average Train R²: {default_results['train_R2'].mean():.3f}");
print(f"Average Test R²: {default_results['test_R2'].mean():.3f}");


# Hyperparameter Tuning
param_grid = {
    "randomforestregressor__n_estimators": [50, 100, 200],
    "randomforestregressor__max_depth": [None, 5, 10, 20]
}

grid_search = GridSearchCV(
    model,
    param_grid=param_grid,
    cv=kfold,
    scoring="neg_mean_absolute_error",
    n_jobs=-1
)

grid_search.fit(X, y);

print("\nHyperparameter Tuning")
print("Best Parameters:", grid_search.best_params_)
print(f"Best Cross-Validated MAE: {-grid_search.best_score_:.3f}")


best_rf = grid_search.best_estimator_

tuned_results = cross_validate(
    best_rf, X, y, cv=kfold, scoring=metrics, return_train_score=True, n_jobs=-1
)

print("\nTuned resultst")
print(f"Average Train MAE: {-tuned_results['train_MAE'].mean():.3f}");
print(f"Average Test MAE: {-tuned_results['test_MAE'].mean():.3f}");
print(f"Average Train R²: {tuned_results['train_R2'].mean():.3f}");
print(f"Average Test R²: {tuned_results['test_R2'].mean():.3f}");

Default results
Average Train MAE: 8.135
Average Test MAE: 21.544
Average Train R²: 0.986
Average Test R²: 0.896

Hyperparameter Tuning
Best Parameters: {'randomforestregressor__max_depth': None, 'randomforestregressor__n_estimators': 200}
Best Cross-Validated MAE: 21.353

Tuned resultst
Average Train MAE: 7.969
Average Test MAE: 21.353
Average Train R²: 0.986
Average Test R²: 0.897
