Implementing Support Vector Regression

In [1]:
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [8]:
# Load the California Housing dataset
data = fetch_california_housing()
X= data.data
y= data.target

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=10)

# Standardize features (critical for SVR)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print("Training shape:", X_train.shape)
print("Test shape:", X_test.shape)

Training shape: (16512, 8)
Test shape: (4128, 8)


In [9]:
class SVR:
    def __init__(self, C=1.0, epsilon=0.1, learning_rate=0.001, max_iter=1000):
        self.C = C                   # Regularization parameter
        self.epsilon = epsilon       # Epsilon for epsilon-insensitive loss
        self.learning_rate = learning_rate
        self.max_iter = max_iter
        self.w = None                # Weights
        self.b = None                # Bias

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.w = np.zeros(n_features)
        self.b = 0

        for _ in range(self.max_iter):
            y_pred = np.dot(X, self.w) + self.b
            errors = y_pred - y

            # Identify samples outside the epsilon tube
            mask = np.abs(errors) > self.epsilon
            X_masked = X[mask]
            errors_masked = errors[mask]

            if len(errors_masked) > 0:
                # Compute gradients
                grad_w = (np.sign(errors_masked).T.dot(X_masked) / len(X_masked) + (self.w / self.C))
                grad_b = np.sum(np.sign(errors_masked)) / len(X_masked)
            else:
                grad_w = self.w / self.C
                grad_b = 0

            # Update parameters
            self.w -= self.learning_rate * grad_w
            self.b -= self.learning_rate * grad_b

    def predict(self, X):
        return np.dot(X, self.w) + self.b

In [10]:
# Initialize and train the custom SVR
custom_svr = SVR(C=1.0, epsilon=0.5, learning_rate=0.001, max_iter=5000)
custom_svr.fit(X_train, y_train)

In [11]:
# Predictions
y_pred_train = custom_svr.predict(X_train)
y_pred_test = custom_svr.predict(X_test)

In [12]:
# Evaluate performance
print("\nCustom SVR Performance:")
print(f"Train MAE: {mean_absolute_error(y_train, y_pred_train):.4f}")
print(f"Test MAE: {mean_absolute_error(y_test, y_pred_test):.4f}")
print(f"Train MSE: {mean_squared_error(y_train, y_pred_train):.4f}")
print(f"Test MSE: {mean_squared_error(y_test, y_pred_test):.4f}")


Custom SVR Performance:
Train MAE: 0.6350
Test MAE: 0.6465
Train MSE: 0.7311
Test MSE: 0.7549
