## Tuning our model

In this notebook, we tune the top model from the previous notebook. 

In [9]:
import pandas as pd
import numpy as np

from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV

In [10]:
## importing the data
home_data = pd.read_csv('cleaned_data.csv')

## the list of features we narrowed down from previous notebook
features = ['LotFrontage', 'LotArea', 'OverallQual', 'YearBuilt', 'ExterQual', 'BsmtFinSF1', 'TotalBsmtSF', 
            '2ndFlrSF', 'GrLivArea', 'FullBath', 'HalfBath', 'KitchenQual', 'Fireplaces', 'GarageArea', 
            'GarageQual', 'GarageCond', 'Bsmt_magnitude', 'Remod_diff', 'MSSubClass_encoded', 'Neighborhood_encoded', 
            'MSZoning_encoded', 'HouseStyle_encoded', 'LotConfig_encoded', 'Condition1_encoded', 'BldgType_encoded', 
            'SaleType_encoded', 'SaleCondition_encoded', 'Attchd', 'Detchd', 'other_garage', 'RFn', 'Unf', 'Fin']

## target is the log of selling price. 
target = home_data.columns[-1]

home_data.sample(5)

Unnamed: 0,Id,LotFrontage,LotArea,LotShape,LandContour,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,...,other_garage,RFn,Unf,Fin,Reg,IR1,other_lots,Gable,Hip,other_roofs
884,1179,54.0,7681,IR1,Lvl,5,6,1921,1950,Gable,...,0,0,1,0,0,1,0,1,0,0
886,1183,160.0,15623,IR1,Lvl,10,5,1996,1996,Hip,...,0,0,0,1,0,1,0,0,1,0
124,175,47.0,12416,IR1,Lvl,6,5,1986,1986,Gable,...,0,0,0,1,0,1,0,1,0,0
56,71,95.0,13651,IR1,Lvl,7,6,1973,1973,Gable,...,0,0,0,1,0,1,0,1,0,0
684,900,65.0,6993,Reg,Lvl,5,7,1961,1994,Gable,...,0,0,1,0,1,0,0,1,0,0


In [11]:
gbc = GradientBoostingRegressor()

In [12]:
## these are our parameters we want to tune.
param_grid = {"n_estimators": np.arange(50,750,100),
              "learning_rate": [0.01, 0.1, 1],
             "max_depth": [1,3,5,10]}

search = GridSearchCV(gbc, param_grid, cv=5).fit(home_data[features], home_data['log_price'])

In [13]:
search.best_params_

{'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 250}