Train and fine-tune an SVM regressor on the California housing dataset. You can
 use the original dataset rather than the tweaked version we used in Chapter 2,
 which you can load using sklearn.datasets.fetch_california_housing().
 The targets represent hundreds of thousands of dollars. Since there are over
 20,000 instances, SVMs can be slow, so for hyperparameter tuning you should
 use far fewer instances (e.g., 2,000) to test many more hyperparameter combina
tions. What is your best model’s RMSE?

### Loading the Dataset

In [73]:
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()
X = housing.data
y = housing.target  # In 100,000s of dollars


In [75]:
print(housing.keys())

dict_keys(['data', 'target', 'frame', 'target_names', 'feature_names', 'DESCR'])


### Split into test and train set

In [78]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Training the data with a simple LinearSVR

In [81]:
import numpy as np
from sklearn.svm import LinearSVR
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

lsvr = make_pipeline(StandardScaler(), LinearSVR(dual=True, random_state=42))

lsvr.fit(X_train, y_train)



In [82]:
import numpy as np
from sklearn.svm import LinearSVR
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

lsvr = make_pipeline(StandardScaler(), LinearSVR(max_iter=5000, dual=True, random_state=42))#added max_iter for liblinear to converge

lsvr.fit(X_train, y_train)

### Getting the performace metrics using RMSE

In [85]:
try:
    from sklearn.metrics import root_mean_squared_error
except ImportError:
    from sklearn.metrics import mean_squared_error

    def root_mean_squared_error(labels, predictions):
        return mean_squared_error(labels, predictions, squared=False)

In [88]:
y_pred = lsvr.predict(X_train)

In [90]:
mse = root_mean_squared_error(y_train, y_pred)
mse

0.979565447829459

In [92]:
np.sqrt(mse) #Computing thr RMSE

0.9897299873346563

### Hyperparameter Tuning

In [95]:
from sklearn.svm import SVR
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import loguniform, uniform

svm_reg = make_pipeline(StandardScaler(), SVR())

param_distrib = {
    "svr__gamma": loguniform(0.001, 0.1),
    "svr__C": uniform(1, 10)
}
rnd_search_cv = RandomizedSearchCV(svm_reg, param_distrib,
                                   n_iter=100, cv=3, random_state=42)
rnd_search_cv.fit(X_train[:2000], y_train[:2000])

In [97]:
rnd_search_cv.best_estimator_

In [99]:
from sklearn.model_selection import cross_val_score



In [104]:
-cross_val_score(rnd_search_cv.best_estimator_, X_train, y_train,
                 scoring="neg_root_mean_squared_error")

array([0.58835648, 0.57468589, 0.58085278, 0.57109886, 0.59853029])

In [106]:
y_pred = rnd_search_cv.best_estimator_.predict(X_test)
rmse = root_mean_squared_error(y_test, y_pred)
rmse

0.5854732265172243