# Grid Searching Models



In [1]:
import numpy as np
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline

#note using Pipeline instead of make_pipeline

In [2]:
iris = datasets.load_iris()
X = iris.data
y = iris.target

In [3]:
# Create a pipeline
pipe = Pipeline([('classifier', RandomForestClassifier())])

#associating a classifier with the name of RandomForestClassifier

# Create space of candidate learning algorithms and their hyperparameters
search_space = [{'classifier': [LogisticRegression()],
                 'classifier__penalty': ['l1', 'l2'],
                 #just like params, hyperparameter is the penalty
                 'classifier__C': np.logspace(0, 4, 10)},
                {'classifier': [RandomForestClassifier()],
                 'classifier__n_estimators': [10, 100, 1000],
                 'classifier__max_features': [1, 2, 3]}]

In [4]:
clf = GridSearchCV(pipe, search_space, cv=5, verbose=0)

#provide these to gridsearch just like with a single model

In [5]:
best_model = clf.fit(X, y)

In [6]:
best_model.best_estimator_.get_params()['classifier']

#find which is the best model by going to the classifier parameter
#we did hyperparameter searches also


LogisticRegression(C=7.742636826811269, class_weight=None, dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='ovr', n_jobs=1, penalty='l1', random_state=None,
          solver='liblinear', tol=0.0001, verbose=0, warm_start=False)

In [7]:
best_model.predict(X)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [None]:
#function format

#def full_fit(model, hyperpararms):
    #pipe(scaling, model)
    #grid(hyperparams)
    #return(lo que sea)

### Exercise

Using either a manual search or automated example like above, please examine the `ames_housing.csv` file, select appropriate features, perform necessary preprocessing, and compare the results of the following models:

- `LinearRegression`
- `Ridge`
- `Lasso`
- `DecisionTreeRegressor`
- `RandomForestRegressor`

You may want to incorporate a feature selection routine here.  Additionally, try bagging and boosting with two of the models to see if this improves performance. You can also compare the performance of the `xgboost` library.

- (10 minutes Preprocessing)
- (10 minutes Feature Selection)
- (10 minutes Model Implementation)
- (5 minutes evaluation and discussion)