## Import library and data and split data

In [2]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.svm import LinearSVR, SVR
from sklearn.linear_model import SGDRegressor
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [3]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split


housing = fetch_california_housing()
X = StandardScaler().fit_transform(housing["data"])
y = housing["target"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

X_train.shape

(16512, 8)

In [4]:
X_train.mean()

0.0011352721859367023

In [5]:
y_train

array([ 1.867,  0.52 ,  1.152, ...,  2.271,  2.817,  1.828])

## Start Model and Train the model prototype

### First lets try linear Model

In [6]:
lin_svr = LinearSVR(loss='squared_epsilon_insensitive', dual=False)

In [7]:
lin_svr.fit(X_train, y_train)

LinearSVR(C=1.0, dual=False, epsilon=0.0, fit_intercept=True,
     intercept_scaling=1.0, loss='squared_epsilon_insensitive',
     max_iter=1000, random_state=None, tol=0.0001, verbose=0)

In [8]:
from sklearn.metrics import mean_squared_error

In [9]:
mse_lin = mean_squared_error(y_train, lin_svr.predict(X_train))
rmse_lin = np.sqrt(mse_lin)
rmse_lin

0.72571885704145711

### Then Use kernelized search with RBFkernel

In [10]:
ker_svr = SVR()

In [11]:
ker_svr.fit(X_train, y_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [12]:
mse_ker = mean_squared_error(y_train, ker_svr.predict(X_train))
rmse_ker = np.sqrt(mse_ker)
rmse_ker

0.58173193703543336

###  Seems rbf kernel is better

Let's try a grid search on this model

In [13]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform

In [14]:
param_dict = {
    
}

param_dict = {"gamma": reciprocal(0.001, 0.1), "C": uniform(1, 10)}
rand_ker_svr=RandomizedSearchCV(ker_svr, param_dict, verbose=2, n_iter=50, scoring="neg_mean_squared_error")

Create a smaller training set for value exploration

In [15]:
small_size = 2000
X_train_small = X_train[:small_size]
y_train_small = y_train[:small_size]

Now do the randomized gird search

In [16]:
rand_ker_svr.fit(X_train_small, y_train_small)

Fitting 3 folds for each of 50 candidates, totalling 150 fits
[CV] C=8.45150477368, gamma=0.0023411496656 ..........................
[CV] ........... C=8.45150477368, gamma=0.0023411496656, total=   0.1s
[CV] C=8.45150477368, gamma=0.0023411496656 ..........................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.2s remaining:    0.0s


[CV] ........... C=8.45150477368, gamma=0.0023411496656, total=   0.1s
[CV] C=8.45150477368, gamma=0.0023411496656 ..........................
[CV] ........... C=8.45150477368, gamma=0.0023411496656, total=   0.1s
[CV] C=8.4411895693, gamma=0.0124238518199 ...........................
[CV] ............ C=8.4411895693, gamma=0.0124238518199, total=   0.1s
[CV] C=8.4411895693, gamma=0.0124238518199 ...........................
[CV] ............ C=8.4411895693, gamma=0.0124238518199, total=   0.1s
[CV] C=8.4411895693, gamma=0.0124238518199 ...........................
[CV] ............ C=8.4411895693, gamma=0.0124238518199, total=   0.1s
[CV] C=8.69827158034, gamma=0.0397416773495 ..........................
[CV] ........... C=8.69827158034, gamma=0.0397416773495, total=   0.1s
[CV] C=8.69827158034, gamma=0.0397416773495 ..........................
[CV] ........... C=8.69827158034, gamma=0.0397416773495, total=   0.1s
[CV] C=8.69827158034, gamma=0.0397416773495 ..........................
[CV] .

[CV] ........... C=5.91160840949, gamma=0.0245752934352, total=   0.1s
[CV] C=5.21175045911, gamma=0.0772734322684 ..........................
[CV] ........... C=5.21175045911, gamma=0.0772734322684, total=   0.1s
[CV] C=5.21175045911, gamma=0.0772734322684 ..........................
[CV] ........... C=5.21175045911, gamma=0.0772734322684, total=   0.1s
[CV] C=5.21175045911, gamma=0.0772734322684 ..........................
[CV] ........... C=5.21175045911, gamma=0.0772734322684, total=   0.1s
[CV] C=7.16593056565, gamma=0.00101405525107 .........................
[CV] .......... C=7.16593056565, gamma=0.00101405525107, total=   0.1s
[CV] C=7.16593056565, gamma=0.00101405525107 .........................
[CV] .......... C=7.16593056565, gamma=0.00101405525107, total=   0.1s
[CV] C=7.16593056565, gamma=0.00101405525107 .........................
[CV] .......... C=7.16593056565, gamma=0.00101405525107, total=   0.1s
[CV] C=6.08383613696, gamma=0.00192611805437 .........................
[CV] .

[CV] ............. C=1.8230692142, gamma=0.064571975659, total=   0.1s
[CV] C=1.8230692142, gamma=0.064571975659 ............................
[CV] ............. C=1.8230692142, gamma=0.064571975659, total=   0.1s
[CV] C=1.8230692142, gamma=0.064571975659 ............................
[CV] ............. C=1.8230692142, gamma=0.064571975659, total=   0.1s
[CV] C=10.8496967369, gamma=0.00104096800857 .........................
[CV] .......... C=10.8496967369, gamma=0.00104096800857, total=   0.1s
[CV] C=10.8496967369, gamma=0.00104096800857 .........................
[CV] .......... C=10.8496967369, gamma=0.00104096800857, total=   0.1s
[CV] C=10.8496967369, gamma=0.00104096800857 .........................
[CV] .......... C=10.8496967369, gamma=0.00104096800857, total=   0.1s
[CV] C=2.50482458241, gamma=0.0155651963605 ..........................
[CV] ........... C=2.50482458241, gamma=0.0155651963605, total=   0.1s
[CV] C=2.50482458241, gamma=0.0155651963605 ..........................
[CV] .

[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:   18.6s finished


RandomizedSearchCV(cv=None, error_score='raise',
          estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False),
          fit_params=None, iid=True, n_iter=50, n_jobs=1,
          param_distributions={'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x117876198>, 'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x117876748>},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score=True, scoring='neg_mean_squared_error',
          verbose=2)

In [18]:
results = -rand_ker_svr.best_score_
results

0.42269990186560363

Seems much better, let's try it on a larger training set

In [19]:
best_ker_svr = rand_ker_svr.best_estimator_

In [20]:
best_ker_svr.fit(X_train, y_train)

SVR(C=10.673350180213307, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
  gamma=0.076173478956985535, kernel='rbf', max_iter=-1, shrinking=True,
  tol=0.001, verbose=False)

In [21]:
mse_best_ker = mean_squared_error(y_train, best_ker_svr.predict(X_train))
rmse_best_ker = np.sqrt(mse_best_ker)
rmse_best_ker

0.5653235570794668

## Compare those different models

In [22]:
best_y_pred = best_ker_svr.predict(X_test)
lin_y_pred = lin_svr.predict(X_test)

In [23]:
test_mse_best_ker = mean_squared_error(y_test, best_y_pred)
test_rmse_best_ker = np.sqrt(test_mse_best_ker)
test_rmse_best_ker

0.57776851603657986

In [24]:
test_mse_lin_ker = mean_squared_error(y_test, lin_y_pred)
test_rmse_lin_ker = np.sqrt(test_mse_lin_ker)
test_rmse_lin_ker

0.71809070267958597

# Seems the model fits just well, and no overfitting no under fitting!

# Fin