# Support Vector Regressor (SVR)

Purpose: load processed data, fit SVR, evaluate with CV, tune hyperparameters with RandomizedSearchCV, and save model.


# Data Loading

In [1]:
from pathlib import Path
import pandas as pd

ROOT = Path("..")
PROC = ROOT / "data" / "train" / "housing_train_processed.csv"
df = pd.read_csv(PROC)
y = df["median_house_value"].values
X = df.drop(columns=["median_house_value"]).values
print(X.shape)


(16512, 24)


# Model Fitting

In [2]:
from sklearn.svm import SVR
model = SVR() 
model.fit(X, y)
print("Train R^2:", model.score(X, y))


Train R^2: -0.050733925111627


# Cross-Validation

In [3]:
from sklearn.model_selection import cross_val_score
import numpy as np
scores = cross_val_score(model, X, y, scoring="neg_mean_squared_error", cv=5, n_jobs=-1)
rmse = np.sqrt(-scores)
print("CV RMSE:", rmse)
print("Mean RMSE:", rmse.mean(), "±", rmse.std())


CV RMSE: [120819.36250153 116704.99969296 118727.51181441 117260.05739857
 117816.71409427]
Mean RMSE: 118265.72910034936 ± 1441.0519180898555


# Hyperparameter Tuning

In [4]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVR
from scipy.stats import loguniform
import numpy as np

param_dist = {
    "C": loguniform(1, 100),     
    "gamma": loguniform(1e-3, 1e-1),
    "kernel": ["rbf"]         
}

rs = RandomizedSearchCV(
    estimator=SVR(),
    param_distributions=param_dist,
    n_iter=8,              
    scoring="neg_mean_squared_error",
    cv=3,
    random_state=42,
    n_jobs=-1
)
rs.fit(X, y)

print("Best params:", rs.best_params_)
print("Best RMSE:", np.sqrt(-rs.best_score_))
best_model = rs.best_estimator_


Best params: {'C': np.float64(29.10635913133069), 'gamma': np.float64(0.015751320499779727), 'kernel': 'rbf'}
Best RMSE: 114323.95136713232


# Model Saving

In [5]:
import joblib, pathlib
final_model = best_model if "best_model" in locals() else model
out = pathlib.Path(".") / "svr_model.pkl"
joblib.dump(final_model, out)
out


PosixPath('svr_model.pkl')