In [1]:
# 📈 Support Vector Regression – California Housing

# 📁 1. Data Loading
import pandas as pd
from sklearn.svm import SVR
from sklearn.model_selection import cross_val_score, GridSearchCV
import joblib

# Load preprocessed training data
df = pd.read_csv("../data/train/housing_train_processed.csv")
X = df.drop("median_house_value", axis=1)
y = df["median_house_value"]

# 🤖 2. Model Fitting
svr_reg = SVR(kernel="rbf")
svr_reg.fit(X, y)

# 🧪 3. Cross-Validation
scores = cross_val_score(svr_reg, X, y, scoring="neg_mean_squared_error", cv=5)
rmse_scores = (-scores) ** 0.5

print("Cross-validation RMSE scores:", rmse_scores)
print("Average RMSE:", rmse_scores.mean())

# 🔍 4. Hyperparameter Tuning
param_grid = {
    "kernel": ["linear", "rbf"],
    "C": [1, 10],
    "epsilon": [0.1, 0.2]
}
grid_search = GridSearchCV(SVR(), param_grid, cv=3,
                           scoring="neg_mean_squared_error", n_jobs=-1)

grid_search.fit(X, y)
best_svr = grid_search.best_estimator_

print("Best parameters:", grid_search.best_params_)

# 💾 5. Model Saving
joblib.dump(best_svr, "../models/svr_model.pkl")
print("✅ Model saved to models/svr_model.pkl")

Cross-validation RMSE scores: [120715.46978119 116631.86305519 118635.52928849 117150.90794416
 117735.67595264]
Average RMSE: 118173.88920433349
Best parameters: {'C': 10, 'epsilon': 0.2, 'kernel': 'linear'}
✅ Model saved to models/svr_model.pkl
