# Getting the data

In [None]:
from sklearn.datasets import fetch_california_housing

housing = fetch_california_housing()

In [3]:
housing.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'feature_names', 'DESCR'])

In [4]:
x = housing['data']
y = housing['target']

In [9]:
from sklearn.model_selection import train_test_split

x_tr, x_te, y_tr, y_te = train_test_split(x, y, test_size=0.2)

In [10]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
x_tr_sc = scaler.fit_transform(x_tr)
x_te_sc = scaler.fit_transform(x_te)

# Trying LinearSVR

In [29]:
from sklearn.svm import LinearSVR

lin_reg = LinearSVR(random_state=42)
lin_reg.fit(x_tr_sc, y_tr)



In [30]:
y_hat = lin_reg.predict(x_tr_sc)

In [31]:
from sklearn.metrics import mean_squared_error

mean_squared_error(y_tr, y_hat)

0.9143984109631319

# Tryng SVR with RandomSearch

In [38]:
from sklearn.svm import SVR
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform

param_distributions = {"gamma" : reciprocal(0.001, 0.1), 
                       "C" : uniform(1, 10)}
rnd_search_cv = RandomizedSearchCV(SVR(), param_distributions, n_iter=10,
                                   verbose=2, cv=3, random_state=42)
rnd_search_cv.fit(x_tr_sc, y_tr)

Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] END .....C=4.745401188473625, gamma=0.07969454818643928; total time=   8.8s
[CV] END .....C=4.745401188473625, gamma=0.07969454818643928; total time=   9.5s
[CV] END .....C=4.745401188473625, gamma=0.07969454818643928; total time=   9.5s
[CV] END .....C=8.31993941811405, gamma=0.015751320499779724; total time=   8.3s
[CV] END .....C=8.31993941811405, gamma=0.015751320499779724; total time=   8.9s
[CV] END .....C=8.31993941811405, gamma=0.015751320499779724; total time=   8.6s
[CV] END ....C=2.560186404424365, gamma=0.002051110418843397; total time=   8.2s
[CV] END ....C=2.560186404424365, gamma=0.002051110418843397; total time=   8.2s
[CV] END ....C=2.560186404424365, gamma=0.002051110418843397; total time=   8.1s
[CV] END ....C=1.5808361216819946, gamma=0.05399484409787431; total time=   8.0s
[CV] END ....C=1.5808361216819946, gamma=0.05399484409787431; total time=   8.0s
[CV] END ....C=1.5808361216819946, gamma=0.05399

In [39]:
from sklearn.metrics import mean_squared_error
import numpy as np

y_hat = rnd_search_cv.best_estimator_.predict(x_tr_sc)
mse = mean_squared_error(y_tr, y_hat)
np.sqrt(mse)

0.5720180558657355

In [40]:
y_hat = rnd_search_cv.best_estimator_.predict(x_te_sc)
mse = mean_squared_error(y_te, y_hat)
np.sqrt(mse)

1.2927236394989867