Support Vector Regressor - Regression

In [1]:
# Importing required libraries for our SVR regression workflow

import numpy as np                                  # numerical operations
import pandas as pd                                 # to work with DataFrames

from sklearn.datasets import fetch_california_housing  # regression dataset
from sklearn.model_selection import train_test_split   # to split data
from sklearn.preprocessing import StandardScaler       # feature scaling
from sklearn.svm import SVR                            # Support Vector Regression model

from sklearn.metrics import mean_squared_error, r2_score # evaluation metrics


In [2]:
# Load the California Housing dataset as a pandas-friendly object
data_reg = fetch_california_housing(as_frame=True)

# Extract the feature matrix (X) — all columns except target
Xr = data_reg.data

# Extract the target variable (y) — median house value
yr = data_reg.target

# Show first 5 rows to understand the data
Xr.head()


Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25


In [3]:
# Splitting the dataset:
# 80% for training, 20% for testing
# random_state=42 ensures reproducibility

Xr_train, Xr_test, yr_train, yr_test = train_test_split(
    Xr, yr, test_size=0.2, random_state=42
)

# Displaying shapes of train–test sets
Xr_train.shape, Xr_test.shape


((16512, 8), (4128, 8))

In [4]:
# Initialize StandardScaler
# This converts all features to mean=0, std=1
scaler_r = StandardScaler()

# Fit scaler on training data AND transform training data
Xr_train_scaled = scaler_r.fit_transform(Xr_train)

# DO NOT fit on test data!
# Only transform using the previously learned scaling
Xr_test_scaled = scaler_r.transform(Xr_test)


In [5]:
# Initializing the Support Vector Regression model
svr = SVR(
    kernel='rbf',     # RBF kernel: captures complex non-linear patterns
    C=1.0,            # regularization strength: smaller → smoother model
    gamma='scale'     # controls model flexibility (auto-scaled)
)


In [6]:
# Training the SVR model on scaled training data
svr.fit(Xr_train_scaled, yr_train)


0,1,2
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,tol,0.001
,C,1.0
,epsilon,0.1
,shrinking,True
,cache_size,200
,verbose,False


In [7]:
# Predict house prices for the scaled test set
yr_pred = svr.predict(Xr_test_scaled)
# Display first 10 predictions
yr_pred[:10]

array([0.51647228, 1.56842925, 3.59780295, 2.49108208, 2.56880838,
       1.67166034, 2.62538359, 1.72918458, 2.31909544, 4.73053473])

In [8]:
# Calculate RMSE (root mean square error)
rmse = np.sqrt(mean_squared_error(yr_test, yr_pred))

# Calculate R² score (coefficient of determination)
r2 = r2_score(yr_test, yr_pred)

print("SVR R² Score:", r2)
print("SVR RMSE:", rmse)


SVR R² Score: 0.7275628923016776
SVR RMSE: 0.5974981438748279


In [9]:
# Step 10: Save the trained model
import joblib
joblib.dump(svr, "C:\\Users\\Admin\\Documents\\Machine_Learning\\Supervised Learning\\ML_SupervisedLearning\\Models\\svr_model.pkl")
# Save predictions to CSV
pd.DataFrame({"Actual": yr_test, "Predicted": yr_pred}).to_csv("C:\\Users\\Admin\\Documents\\Machine_Learning\\Supervised Learning\\ML_SupervisedLearning\\Predictions\\SVR_Predictions.csv", index=False)
# End of SVR regression workflow code
