In [1]:
from sklearn.datasets import fetch_california_housing

dataset = fetch_california_housing()
X_full, y_full = dataset.data, dataset.target
feature_names = dataset.feature_names

In [7]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_full, y_full,test_size = 0.33, random_state=42)

In [10]:
from sklearn.preprocessing import StandardScaler
#FIT to the training data then transform training and test data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [11]:
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error

svm_poly_reg = SVR(kernel="poly", degree=2, C=100, epsilon=0.1)
svm_poly_reg.fit(X_train_scaled, y_train)
y_pred = svm_poly_reg.predict(X_test_scaled)
print(mean_squared_error(y_test, y_pred))



1.348101122840157


### RBF kernel's performance is significantly better:


In [12]:
import numpy as np
svm_poly_reg = SVR(C=100, epsilon=0.1)
svm_poly_reg.fit(X_train_scaled, y_train)
y_pred = svm_poly_reg.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
print(f"mse{mse} RMSE {np.sqrt(mse)}")

mse0.31743482649692994 RMSE 0.5634135483789239


In [13]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform

param_distributions = {"gamma": reciprocal(0.001, 0.1), "C": uniform(1, 10)}
rnd_search_cv = RandomizedSearchCV(SVR(), param_distributions, n_iter=30, verbose=2, cv=3)
rnd_search_cv.fit(X_train_scaled, y_train)

print(rnd_search_cv.best_params_)
y_pred = rnd_search_cv.best_estimator_.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
print(f"mse{mse} RMSE {np.sqrt(mse)}")

Fitting 3 folds for each of 30 candidates, totalling 90 fits
[CV] END .....C=7.713045932283995, gamma=0.00884066642569577; total time=   6.3s
[CV] END .....C=7.713045932283995, gamma=0.00884066642569577; total time=   7.0s
[CV] END .....C=7.713045932283995, gamma=0.00884066642569577; total time=   6.7s
[CV] END ..C=2.5042228146489576, gamma=0.0012603710086651644; total time=   6.9s
[CV] END ..C=2.5042228146489576, gamma=0.0012603710086651644; total time=   6.8s
[CV] END ..C=2.5042228146489576, gamma=0.0012603710086651644; total time=   6.1s
[CV] END ...C=3.487024266376201, gamma=0.0028156969521102326; total time=   6.3s
[CV] END ...C=3.487024266376201, gamma=0.0028156969521102326; total time=   6.6s
[CV] END ...C=3.487024266376201, gamma=0.0028156969521102326; total time=   6.4s
[CV] END ..C=10.152290247045073, gamma=0.0021592992405838804; total time=   6.4s
[CV] END ..C=10.152290247045073, gamma=0.0021592992405838804; total time=   6.7s
[CV] END ..C=10.152290247045073, gamma=0.0021592