## Import library and data and split data

In [19]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.svm import LinearSVR, SVR
from sklearn.linear_model import SGDRegressor
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [16]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split


housing = fetch_california_housing()
X = StandardScaler().fit_transform(housing["data"])
y = housing["target"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

X_train.shape

(16512, 8)

In [17]:
X_train.mean()

-0.00092779378616903081

In [18]:
y_train

array([ 5.00001,  2.282  ,  3.056  , ...,  0.922  ,  1.518  ,  0.62   ])

## Start Model and Train the model prototype

### First lets try linear Model

In [24]:
lin_svr = LinearSVR(loss='squared_epsilon_insensitive', dual=False)

In [25]:
lin_svr.fit(X_train, y_train)

LinearSVR(C=1.0, dual=False, epsilon=0.0, fit_intercept=True,
     intercept_scaling=1.0, loss='squared_epsilon_insensitive',
     max_iter=1000, random_state=None, tol=0.0001, verbose=0)

In [26]:
from sklearn.metrics import mean_squared_error

In [29]:
mse_lin = mean_squared_error(y_train, lin_svr.predict(X_train))
rmse_lin = np.sqrt(mse_lin)
rmse_lin

0.71555297827319209

### Then Use kernelized search with RBFkernel

In [30]:
ker_svr = SVR()

In [31]:
ker_svr.fit(X_train, y_train)

SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [33]:
mse_ker = mean_squared_error(y_train, ker_svr.predict(X_train))
rmse_ker = np.sqrt(mse_ker)
rmse_ker

0.57504938363002644

###  Seems rbf kernel is better

Let's try a grid search on this model

In [37]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform

In [39]:
param_dict = {
    
}

param_dict = {"gamma": reciprocal(0.001, 0.1), "C": uniform(1, 10)}
rand_ker_svr=RandomizedSearchCV(ker_svr, param_dict, verbose=2, n_iter=50, scoring="neg_mean_squared_error")

Create a smaller training set for value exploration

In [40]:
small_size = 2000
X_train_small = X_train[:small_size]
y_train_small = y_train[:small_size]

Now do the randomized gird search

In [41]:
rand_ker_svr.fit(X_train_small, y_train_small)

Fitting 3 folds for each of 50 candidates, totalling 150 fits
[CV] C=9.82296394959, gamma=0.0141464620499 ..........................
[CV] ........... C=9.82296394959, gamma=0.0141464620499, total=   0.1s
[CV] C=9.82296394959, gamma=0.0141464620499 ..........................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.2s remaining:    0.0s


[CV] ........... C=9.82296394959, gamma=0.0141464620499, total=   0.1s
[CV] C=9.82296394959, gamma=0.0141464620499 ..........................
[CV] ........... C=9.82296394959, gamma=0.0141464620499, total=   0.1s
[CV] C=1.94482789257, gamma=0.0589802173355 ..........................
[CV] ........... C=1.94482789257, gamma=0.0589802173355, total=   0.1s
[CV] C=1.94482789257, gamma=0.0589802173355 ..........................
[CV] ........... C=1.94482789257, gamma=0.0589802173355, total=   0.1s
[CV] C=1.94482789257, gamma=0.0589802173355 ..........................
[CV] ........... C=1.94482789257, gamma=0.0589802173355, total=   0.1s
[CV] C=1.77393880573, gamma=0.00113181656648 .........................
[CV] .......... C=1.77393880573, gamma=0.00113181656648, total=   0.1s
[CV] C=1.77393880573, gamma=0.00113181656648 .........................
[CV] .......... C=1.77393880573, gamma=0.00113181656648, total=   0.1s
[CV] C=1.77393880573, gamma=0.00113181656648 .........................
[CV] .

[CV] ........... C=4.82875922648, gamma=0.0189452252128, total=   0.1s
[CV] C=4.82875922648, gamma=0.0189452252128 ..........................
[CV] ........... C=4.82875922648, gamma=0.0189452252128, total=   0.1s
[CV] C=4.82875922648, gamma=0.0189452252128 ..........................
[CV] ........... C=4.82875922648, gamma=0.0189452252128, total=   0.1s
[CV] C=5.81341213795, gamma=0.0522605564537 ..........................
[CV] ........... C=5.81341213795, gamma=0.0522605564537, total=   0.1s
[CV] C=5.81341213795, gamma=0.0522605564537 ..........................
[CV] ........... C=5.81341213795, gamma=0.0522605564537, total=   0.1s
[CV] C=5.81341213795, gamma=0.0522605564537 ..........................
[CV] ........... C=5.81341213795, gamma=0.0522605564537, total=   0.1s
[CV] C=3.96816127129, gamma=0.0176230742221 ..........................
[CV] ........... C=3.96816127129, gamma=0.0176230742221, total=   0.1s
[CV] C=3.96816127129, gamma=0.0176230742221 ..........................
[CV] .

[CV] ........... C=6.62520319159, gamma=0.0998001853281, total=   0.1s
[CV] C=6.62520319159, gamma=0.0998001853281 ..........................
[CV] ........... C=6.62520319159, gamma=0.0998001853281, total=   0.1s
[CV] C=3.72631591669, gamma=0.0100813109296 ..........................
[CV] ........... C=3.72631591669, gamma=0.0100813109296, total=   0.1s
[CV] C=3.72631591669, gamma=0.0100813109296 ..........................
[CV] ........... C=3.72631591669, gamma=0.0100813109296, total=   0.1s
[CV] C=3.72631591669, gamma=0.0100813109296 ..........................
[CV] ........... C=3.72631591669, gamma=0.0100813109296, total=   0.1s
[CV] C=4.41634594057, gamma=0.00467538239874 .........................
[CV] .......... C=4.41634594057, gamma=0.00467538239874, total=   0.1s
[CV] C=4.41634594057, gamma=0.00467538239874 .........................
[CV] .......... C=4.41634594057, gamma=0.00467538239874, total=   0.1s
[CV] C=4.41634594057, gamma=0.00467538239874 .........................
[CV] .

[Parallel(n_jobs=1)]: Done 150 out of 150 | elapsed:   19.6s finished


RandomizedSearchCV(cv=None, error_score='raise',
          estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False),
          fit_params=None, iid=True, n_iter=50, n_jobs=1,
          param_distributions={'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x10e995d30>, 'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x10d54b0f0>},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score=True, scoring='neg_mean_squared_error',
          verbose=2)

In [51]:
results = -rand_ker_svr.best_score_
results

0.38351839849755115

Seems much better, let's try it on a larger training set

In [53]:
best_ker_svr = rand_ker_svr.best_estimator_

In [54]:
best_ker_svr.fit(X_train, y_train)

SVR(C=9.2045353256593661, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
  gamma=0.095407041708655715, kernel='rbf', max_iter=-1, shrinking=True,
  tol=0.001, verbose=False)

In [55]:
mse_best_ker = mean_squared_error(y_train, best_ker_svr.predict(X_train))
rmse_best_ker = np.sqrt(mse_best_ker)
rmse_best_ker

0.55099765950237567

## Compare those different models

In [56]:
best_y_pred = best_ker_svr.predict(X_test)
lin_y_pred = lin_svr.predict(X_test)

In [58]:
test_mse_best_ker = mean_squared_error(y_test, best_y_pred)
test_rmse_best_ker = np.sqrt(test_mse_best_ker)
test_rmse_best_ker

0.59071113481912729

In [60]:
test_mse_lin_ker = mean_squared_error(y_test, lin_y_pred)
test_rmse_lin_ker = np.sqrt(test_mse_lin_ker)
test_rmse_lin_ker

1.1392903649252475

# Seems the model fits just well, and no overfitting no under fitting!

# Fin