## Cross Validated Grid Search

Let's do a Cross Validated Grid Search for the following Models
- Ridge
- Lasso
- KNN (K Nearest Neighbor)
- SVM (Support Vector Machine)
- Decision Tree

In [5]:
cd ..

/home/jovyan/Ames_Housing_Data


In [6]:
!pip --quiet install mglearn

In [7]:
import mglearn
from sklearn.linear_model import Ridge, Lasso, LogisticRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV, ShuffleSplit, train_test_split
from sklearn.preprocessing import StandardScaler, scale
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [8]:
run src/load_data_2.py

In [9]:
housing_df = load_train_data()

In [10]:
clean_data(housing_df)
housing_df.shape
#housing_df.dtypes

(1423, 78)

#### One hot encode the categorical features

In [11]:
features = one_hot_encode_categorical_features(housing_df)

#### Filter just the Features selected

In [12]:
features = features[eda_selected_features()]

#### Split the dataset into train and test

In [13]:
target = housing_df["SalePrice"]
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size = .25, random_state = 42)

#### Scale the data

In [14]:
scaler = StandardScaler()
scaler.fit(X_train)

X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

### (1) Grid Search CV on Ridge

In [15]:
ridge = Ridge()
ridge.get_params()

{'alpha': 1.0,
 'copy_X': True,
 'fit_intercept': True,
 'max_iter': None,
 'normalize': False,
 'random_state': None,
 'solver': 'auto',
 'tol': 0.001}

In [16]:
ridge_params = [
    { 'alpha': np.logspace(.1, 1, 22), 'solver': ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga'] }
]
n_folds = 5

In [17]:
ridge_gs = GridSearchCV(ridge, param_grid= ridge_params, cv=n_folds, n_jobs=-1)

In [18]:
ridge_gs.fit(X_train_scaled, y_train)

GridSearchCV(cv=5, error_score='raise',
       estimator=Ridge(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='auto', tol=0.001),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid=[{'alpha': array([  1.25893,   1.3895 ,   1.53361,   1.69267,   1.86822,   2.06199,
         2.27585,   2.51189,   2.77241,   3.05995,   3.37731,   3.72759,
         4.1142 ,   4.54091,   5.01187,   5.53168,   6.1054 ,   6.73863,
         7.43753,   8.20891,   9.06031,  10.     ]), 'solver': ['auto', 'svd', 'cholesky', 'lsqr', 'sparse_cg', 'sag', 'saga']}],
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [19]:
ridge_gs.best_score_

0.68093786461458439

In [20]:
ridge_gs.best_params_

{'alpha': 10.0, 'solver': 'saga'}

In [21]:
ridge_gs.best_estimator_

Ridge(alpha=10.0, copy_X=True, fit_intercept=True, max_iter=None,
   normalize=False, random_state=None, solver='saga', tol=0.001)

In [22]:
ridge_gs.score(X_train_scaled, y_train)

0.69239557261245754

In [23]:
ridge_gs.score(X_test_scaled, y_test)

0.71501998106573317

### (2) Grid Search CV on Lasso

In [None]:
lasso = Lasso()

In [None]:
lasso_params = [
    { 'alpha': np.logspace(.1, 1, 12) }, # last parameter represents the number of samples
    { 'alpha': np.logspace(-3, 3, 5) },
]
n_folds = 5

In [None]:
lasso_gs = GridSearchCV(lasso, param_grid= lasso_params, cv=n_folds, n_jobs=-1)

In [None]:
lasso_gs.fit(X_train_scaled, y_train)

In [None]:
lasso_gs.best_score_

In [None]:
lasso_gs.best_params_

In [None]:
lasso_gs.best_estimator_

In [None]:
y_test_pred = lasso_gs.predict(X_test_scaled)

In [None]:
lasso_gs.score(X_train_scaled, y_train)

In [None]:
lasso_gs.score(X_test_scaled, y_test)

In [None]:
#np.logspace(-4, -0.5, 30)
#np.logspace(-3, 3, 5)

### (3) Grid Search CV on KNN

In [None]:
knn = KNeighborsRegressor()

In [None]:
knn_params = [
    { 'n_neighbors': range(5, 100, 5) }  
]
n_folds = 10

In [None]:
knn_gs = GridSearchCV(knn, param_grid= knn_params, cv=n_folds, n_jobs=-1)

In [None]:
knn_gs.fit(X_train_scaled, y_train)

In [None]:
knn_gs.best_score_

In [None]:
knn_gs.best_params_

In [None]:
knn_gs.best_estimator_

In [None]:
y_test_pred = knn_gs.predict(X_test_scaled)

In [None]:
knn_gs.score(X_train_scaled, y_train)

In [None]:
knn_gs.score(X_test_scaled, y_test)

### (4) Grid Search CV on SVM

In [24]:
svm = SVR()

In [25]:
svm_params = [
    { 'C': np.logspace(-3,3,7), 'kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 'gamma': [0.001, 0.0001], 'epsilon':[.1,.2] }
]
n_folds = 5

In [26]:
svm_gs = GridSearchCV(svm, param_grid= svm_params, cv=n_folds, n_jobs=-1)

In [27]:
svm_gs.fit(X_train_scaled, y_train)

GridSearchCV(cv=5, error_score='raise',
       estimator=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid=[{'C': array([  1.00000e-03,   1.00000e-02,   1.00000e-01,   1.00000e+00,
         1.00000e+01,   1.00000e+02,   1.00000e+03]), 'kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 'gamma': [0.001, 0.0001], 'epsilon': [0.1, 0.2]}],
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [28]:
svm_gs.best_score_

0.67995802135361894

In [29]:
svm_gs.best_params_

{'C': 1000.0, 'epsilon': 0.1, 'gamma': 0.001, 'kernel': 'linear'}

In [30]:
svm_gs.best_estimator_

SVR(C=1000.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.001,
  kernel='linear', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

In [31]:
y_test_pred = svm_gs.predict(X_test_scaled)

In [32]:
svm_gs.score(X_train_scaled, y_train)

0.67923341444661423

In [33]:
svm_gs.score(X_test_scaled, y_test)

0.69676494229450214

### (5) Grid Search CV on DecisionTreeRegressor

In [110]:
dtree = DecisionTreeRegressor()

In [120]:
dtree_params = [
    { 'criterion': ['mse', 'friedman_mse', 'mae'], 'splitter': ['best', 'random'], 'max_depth': range(1,10,1) }  
]
n_folds = 10

In [121]:
dtree_gs = GridSearchCV(dtree, param_grid= dtree_params, cv=n_folds, n_jobs=-1)

In [122]:
dtree_gs.fit(X_train, y_train)

GridSearchCV(cv=10, error_score='raise',
       estimator=DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
           max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           presort=False, random_state=None, splitter='best'),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid=[{'criterion': ['mse', 'friedman_mse', 'mae'], 'splitter': ['best', 'random'], 'max_depth': range(1, 10)}],
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [123]:
dtree_gs.best_score_

0.71668452015278805

In [124]:
dtree_gs.best_params_

{'criterion': 'mse', 'max_depth': 8, 'splitter': 'best'}

In [125]:
dtree_gs.best_estimator_

DecisionTreeRegressor(criterion='mse', max_depth=8, max_features=None,
           max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           presort=False, random_state=None, splitter='best')

In [126]:
y_test_pred = dtree_gs.predict(X_test)

In [127]:
dtree_gs.score(X_train, y_train)

0.92965213818241577

In [119]:
dtree_gs.score(X_test, y_test)

0.6818173199050338

In [None]:
y_train.mean()

In [None]:
y_test.mean()