# 6.5 - Grid Search

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [2]:
titanic = sns.load_dataset('titanic')

def convert_class(in_string):
    if in_string == 'First':
        return 1
    elif in_string == 'Second':
        return 2
    elif in_string == 'Third':
        return 3

titanic['class'] = titanic['class'].apply(convert_class)
titanic['sex'] = titanic['sex'].apply(lambda x: 1 if x == 'male' else 0)

embarked_onehot = pd.get_dummies(titanic.embarked, drop_first=True)
class_onehot = pd.get_dummies(titanic.embark_town, drop_first=True)
deck_onehot = pd.get_dummies(titanic.deck, drop_first=True)
deck_onehot = pd.get_dummies(titanic.who, drop_first=True)

titanic = pd.concat([titanic, embarked_onehot, class_onehot, deck_onehot], axis = 'columns')
titanic = titanic.drop(['embarked','embark_town','deck','who'], axis = 'columns')
titanic = titanic.drop('alive', axis = 'columns')
titanic.head()

from sklearn.model_selection import train_test_split
X = titanic.drop('survived', axis = 'columns')
y = titanic.survived
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, stratify = y)

from sklearn.preprocessing import Imputer
imp = Imputer(strategy='mean')
imp.fit(X_train)
X_train = imp.transform(X_train)
X_test = imp.transform(X_test)

# How do we decide which hyperparameters to use?
With the random forest from earlier, we just played around with different values. One method to find the right values is to perform an exhaustive search over all combinations of values. This is a grid search.

In [6]:
# import grid search
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
from sklearn.cross_validation import cross_val_score

In [3]:

# define a dictionary of value ranges. the dictionary keys are some random forest parameters


# initialize a grid search object with an estimator, parameter grid, scoring option, and number of cv-folds


# fit the grid search object to training data. it will search the grid for the best parameter values given
# ...the training data



In [4]:
# After a grid search object has been fit, it can return the estimator with the best parameters


# It can return just the best parameters


# It can return the best cross-validated score


# It can return scores for every parameter combination. here we just display the first 10 combinations.




# Your turn. Perform a grid search for the best logistic regression parameters.

In [34]:
# import LogisticRegression


# initialize estimator object


# define dictionary of parameter ranges


# initialize grid search object


# fit the grid to perform a grid search


# print the best parameters


# print best score


# Pipelining then grid searching
You can even grid search on a pipeline object!

Let's bring back our imperfect titanic dataset. We want to perform missing value imputation, feature scaling, and prediction using an svm.

In [36]:
X = titanic.drop('survived', axis = 'columns')
y = titanic.survived

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, stratify = y)

In [38]:
# import Pipeline, SVC, and transformer classes
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.preprocessing import Imputer, StandardScaler

# initialize objects we want to put in the pipeline


# define the steps we want our pipeline to take. here we add transformers and an optional estimator at the end.
# these must always take the form of a list of tuples.
# the names you give in the first position of each tuple is used when defining the dictionary of parameters.


# initialize pipeline with dictionary of steps




### When defining the dictionary of parameters, we must now specify the transformer/object the paramater belongs to in the dictionary keys. This is done with *two underscores*.

In [5]:


# remember that we're performing a grid search over parameters for multiple objects in a pipeline




### If you use the grid search object to predict, it will use the best estimator.