### SVM Regressor on the California Housing Dataset

In [1]:
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
X = housing.data
y = housing.target

In [6]:
X.shape

(20640, 8)

In [7]:
y.shape

(20640,)

In [2]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.svm import LinearSVR

# Base Model
lin_svr = make_pipeline(
    StandardScaler(),
    LinearSVR(dual=True, max_iter=5000, random_state=42)
)

lin_svr.get_feature_names_out

<bound method Pipeline.get_feature_names_out of Pipeline(steps=[('standardscaler', StandardScaler()),
                ('linearsvr',
                 LinearSVR(dual=True, max_iter=5000, random_state=42))])>

In [11]:
lin_svr.fit(X_train, y_train)

In [12]:
from sklearn.model_selection import cross_val_score

mse = -cross_val_score(lin_svr, X_train, y_train, scoring="neg_mean_squared_error")
mse.mean()

1.928806332807802

In [13]:
mse

array([0.54122564, 0.53347784, 6.71449235, 1.01647786, 0.83835797])

In [15]:
import numpy as np

np.sqrt(mse).mean()

1.1962267972428624

In [16]:
# Using RBF Kernel
from sklearn.svm import SVR
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import loguniform, uniform

In [17]:
svm_clf = make_pipeline(StandardScaler(), SVR())

param_distrib = {
    "svr__gamma": loguniform(0.001, 0.1),
    "svr__C": uniform(1, 10)
}
rnd_search_cv = RandomizedSearchCV(svm_clf, param_distrib,
                                   n_iter=100, cv=3, random_state=42)
rnd_search_cv.fit(X_train, y_train)

In [18]:
rnd_search_cv.best_estimator_

In [19]:
rnd_search_cv.best_score_

0.7453216639743857

In [21]:
-cross_val_score(rnd_search_cv.best_estimator_, X_train, y_train, scoring="neg_root_mean_squared_error")

array([0.58835648, 0.57468589, 0.58085278, 0.57109886, 0.59853029])