# Setup

Make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline

In [462]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
%config InlineBackend.figure_format = 'retina' # for retina screens
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

Configure notebook to display all results in cell

In [463]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Revert to the last line of output only
# InteractiveShell.ast_node_interactivity = "last_expr"

# Load Data

In [464]:
import pandas as pd

houses_train = pd.read_csv('../Data/encoded.houses.train.csv')
houses_test = pd.read_csv('../Data/encoded.houses.test.csv')

In [465]:
print("houses_train dimensions: {}".format(houses_train.shape))
print("houses_test dimensions: {}".format(houses_test.shape))

houses_train dimensions: (1460, 465)
houses_test dimensions: (1459, 465)


In [466]:
pd.set_option("display.max_columns", 328)
houses_train.head(3)

Unnamed: 0,MSSubClass_lev_x.120,MSSubClass_lev_x.160,MSSubClass_lev_x.180,MSSubClass_lev_x.190,MSSubClass_lev_x.20,MSSubClass_lev_x.30,MSSubClass_lev_x.40,MSSubClass_lev_x.45,MSSubClass_lev_x.50,MSSubClass_lev_x.60,MSSubClass_lev_x.70,MSSubClass_lev_x.75,MSSubClass_lev_x.80,MSSubClass_lev_x.85,MSSubClass_lev_x.90,MSZoning_lev_x.C..all.,MSZoning_lev_x.FV,MSZoning_lev_x.RH,MSZoning_lev_x.RL,MSZoning_lev_x.RM,LotFrontage_clean,LotArea_clean,Street_lev_x.Grvl,Street_lev_x.Pave,Alley_lev_x.Grvl,Alley_lev_x.NA,Alley_lev_x.Pave,LotShape_lev_x.IR1,LotShape_lev_x.IR2,LotShape_lev_x.IR3,LotShape_lev_x.Reg,LandContour_lev_x.Bnk,LandContour_lev_x.HLS,LandContour_lev_x.Low,LandContour_lev_x.Lvl,Utilities_lev_x.AllPub,Utilities_lev_x.NoSeWa,LotConfig_lev_x.Corner,LotConfig_lev_x.CulDSac,LotConfig_lev_x.FR2,LotConfig_lev_x.FR3,LotConfig_lev_x.Inside,LandSlope_lev_x.Gtl,LandSlope_lev_x.Mod,LandSlope_lev_x.Sev,Neighborhood_lev_x.Blmngtn,Neighborhood_lev_x.Blueste,Neighborhood_lev_x.BrDale,Neighborhood_lev_x.BrkSide,Neighborhood_lev_x.ClearCr,Neighborhood_lev_x.CollgCr,Neighborhood_lev_x.Crawfor,Neighborhood_lev_x.Edwards,Neighborhood_lev_x.Gilbert,Neighborhood_lev_x.IDOTRR,Neighborhood_lev_x.MeadowV,Neighborhood_lev_x.Mitchel,Neighborhood_lev_x.NAmes,Neighborhood_lev_x.NoRidge,Neighborhood_lev_x.NPkVill,Neighborhood_lev_x.NridgHt,Neighborhood_lev_x.NWAmes,Neighborhood_lev_x.OldTown,Neighborhood_lev_x.Sawyer,Neighborhood_lev_x.SawyerW,Neighborhood_lev_x.Somerst,Neighborhood_lev_x.StoneBr,Neighborhood_lev_x.SWISU,Neighborhood_lev_x.Timber,Neighborhood_lev_x.Veenker,Condition1_lev_x.Artery,Condition1_lev_x.Feedr,Condition1_lev_x.Norm,Condition1_lev_x.PosA,Condition1_lev_x.PosN,Condition1_lev_x.RRAe,Condition1_lev_x.RRAn,Condition1_lev_x.RRNe,Condition1_lev_x.RRNn,Condition2_lev_x.Artery,Condition2_lev_x.Feedr,Condition2_lev_x.Norm,Condition2_lev_x.PosA,Condition2_lev_x.PosN,Condition2_lev_x.RRAe,Condition2_lev_x.RRAn,Condition2_lev_x.RRNn,BldgType_lev_x.1Fam,BldgType_lev_x.2fmCon,BldgType_lev_x.Duplex,BldgType_lev_x.Twnhs,BldgType_lev_x.TwnhsE,HouseStyle_lev_x.1.5Fin,HouseStyle_lev_x.1.5Unf,HouseStyle_lev_x.1Story,HouseStyle_lev_x.2.5Fin,HouseStyle_lev_x.2.5Unf,HouseStyle_lev_x.2Story,HouseStyle_lev_x.SFoyer,HouseStyle_lev_x.SLvl,OverallQual_lev_x.1,OverallQual_lev_x.10,OverallQual_lev_x.2,OverallQual_lev_x.3,OverallQual_lev_x.4,OverallQual_lev_x.5,OverallQual_lev_x.6,OverallQual_lev_x.7,OverallQual_lev_x.8,OverallQual_lev_x.9,OverallCond_lev_x.1,OverallCond_lev_x.2,OverallCond_lev_x.3,OverallCond_lev_x.4,OverallCond_lev_x.5,OverallCond_lev_x.6,OverallCond_lev_x.7,OverallCond_lev_x.8,OverallCond_lev_x.9,YearBuilt_clean,YearRemodAdd_clean,RoofStyle_lev_x.Flat,RoofStyle_lev_x.Gable,RoofStyle_lev_x.Gambrel,RoofStyle_lev_x.Hip,RoofStyle_lev_x.Mansard,RoofStyle_lev_x.Shed,RoofMatl_lev_x.ClyTile,RoofMatl_lev_x.CompShg,RoofMatl_lev_x.Membran,RoofMatl_lev_x.Metal,RoofMatl_lev_x.Roll,RoofMatl_lev_x.Tar.Grv,RoofMatl_lev_x.WdShake,RoofMatl_lev_x.WdShngl,Exterior1st_lev_x.AsbShng,Exterior1st_lev_x.AsphShn,Exterior1st_lev_x.BrkComm,Exterior1st_lev_x.BrkFace,Exterior1st_lev_x.CBlock,Exterior1st_lev_x.CemntBd,Exterior1st_lev_x.HdBoard,Exterior1st_lev_x.ImStucc,Exterior1st_lev_x.MetalSd,Exterior1st_lev_x.Plywood,Exterior1st_lev_x.Stone,Exterior1st_lev_x.Stucco,Exterior1st_lev_x.VinylSd,Exterior1st_lev_x.Wd.Sdng,Exterior1st_lev_x.WdShing,Exterior2nd_lev_x.AsbShng,Exterior2nd_lev_x.AsphShn,Exterior2nd_lev_x.Brk.Cmn,Exterior2nd_lev_x.BrkFace,Exterior2nd_lev_x.CBlock,Exterior2nd_lev_x.CmentBd,Exterior2nd_lev_x.HdBoard,Exterior2nd_lev_x.ImStucc,Exterior2nd_lev_x.MetalSd,Exterior2nd_lev_x.Other,Exterior2nd_lev_x.Plywood,Exterior2nd_lev_x.Stone,Exterior2nd_lev_x.Stucco,Exterior2nd_lev_x.VinylSd,...,PoolQC_lev_x.Ex,PoolQC_lev_x.Fa,PoolQC_lev_x.Gd,PoolQC_lev_x.NA,Fence_lev_x.GdPrv,Fence_lev_x.GdWo,Fence_lev_x.MnPrv,Fence_lev_x.MnWw,Fence_lev_x.NA,MiscFeature_lev_x.Gar2,MiscFeature_lev_x.NA,MiscFeature_lev_x.Othr,MiscFeature_lev_x.Shed,MiscFeature_lev_x.TenC,MiscVal_clean,MoSold_clean,YrSold_clean,SaleType_lev_x.COD,SaleType_lev_x.Con,SaleType_lev_x.ConLD,SaleType_lev_x.ConLI,SaleType_lev_x.ConLw,SaleType_lev_x.CWD,SaleType_lev_x.New,SaleType_lev_x.Oth,SaleType_lev_x.WD,SaleCondition_lev_x.Abnorml,SaleCondition_lev_x.AdjLand,SaleCondition_lev_x.Alloca,SaleCondition_lev_x.Family,SaleCondition_lev_x.Normal,SaleCondition_lev_x.Partial,PoolMasVnrArea.interaction_lev_x.0.Ex,PoolMasVnrArea.interaction_lev_x.0.Fa,PoolMasVnrArea.interaction_lev_x.0.NA,PoolMasVnrArea.interaction_lev_x.120.NA,PoolMasVnrArea.interaction_lev_x.1600.NA,PoolMasVnrArea.interaction_lev_x.163.5.Gd,PoolMasVnrArea.interaction_lev_x.163.5.NA,PoolMasVnrArea.interaction_lev_x.200.Gd,PoolMasVnrArea.interaction_lev_x.200.NA,PoolMasVnrArea.interaction_lev_x.256.Gd,PoolMasVnrArea.interaction_lev_x.256.NA,PoolMasVnrArea.interaction_lev_x.322.2.NA,PoolMasVnrArea.interaction_lev_x.466.NA,PoolMasVnrArea.interaction_lev_x.72.NA,Condition2.ExterCond.interaction_lev_x.Artery.Ex,Condition2.ExterCond.interaction_lev_x.Artery.Fa,Condition2.ExterCond.interaction_lev_x.Artery.Gd,Condition2.ExterCond.interaction_lev_x.Artery.TA,Condition2.ExterCond.interaction_lev_x.Feedr.Fa,Condition2.ExterCond.interaction_lev_x.Feedr.Gd,Condition2.ExterCond.interaction_lev_x.Feedr.TA,Condition2.ExterCond.interaction_lev_x.Norm.Ex,Condition2.ExterCond.interaction_lev_x.Norm.Fa,Condition2.ExterCond.interaction_lev_x.Norm.Gd,Condition2.ExterCond.interaction_lev_x.Norm.Po,Condition2.ExterCond.interaction_lev_x.Norm.TA,Condition2.ExterCond.interaction_lev_x.PosA.Gd,Condition2.ExterCond.interaction_lev_x.PosA.TA,Condition2.ExterCond.interaction_lev_x.PosN.Gd,Condition2.ExterCond.interaction_lev_x.PosN.TA,Condition2.ExterCond.interaction_lev_x.RRAe.Gd,Condition2.ExterCond.interaction_lev_x.RRAe.TA,Condition2.ExterCond.interaction_lev_x.RRAn.Gd,Condition2.ExterCond.interaction_lev_x.RRAn.TA,Condition2.ExterCond.interaction_lev_x.RRNe.TA,Condition2.ExterCond.interaction_lev_x.RRNn.TA,LotArea.LandContour.interaction_lev_x.10151.6.Bnk,LotArea.LandContour.interaction_lev_x.10151.6.HLS,LotArea.LandContour.interaction_lev_x.10151.6.Low,LotArea.LandContour.interaction_lev_x.10151.6.Lvl,LotArea.LandContour.interaction_lev_x.10550.5.Bnk,LotArea.LandContour.interaction_lev_x.10550.5.HLS,LotArea.LandContour.interaction_lev_x.10550.5.Low,LotArea.LandContour.interaction_lev_x.10550.5.Lvl,LotArea.LandContour.interaction_lev_x.11001.2.Bnk,LotArea.LandContour.interaction_lev_x.11001.2.HLS,LotArea.LandContour.interaction_lev_x.11001.2.Low,LotArea.LandContour.interaction_lev_x.11001.2.Lvl,LotArea.LandContour.interaction_lev_x.11570.Bnk,LotArea.LandContour.interaction_lev_x.11570.HLS,LotArea.LandContour.interaction_lev_x.11570.Low,LotArea.LandContour.interaction_lev_x.11570.Lvl,LotArea.LandContour.interaction_lev_x.12203.8.Bnk,LotArea.LandContour.interaction_lev_x.12203.8.HLS,LotArea.LandContour.interaction_lev_x.12203.8.Low,LotArea.LandContour.interaction_lev_x.12203.8.Lvl,LotArea.LandContour.interaction_lev_x.1300.Bnk,LotArea.LandContour.interaction_lev_x.1300.HLS,LotArea.LandContour.interaction_lev_x.1300.Low,LotArea.LandContour.interaction_lev_x.1300.Lvl,LotArea.LandContour.interaction_lev_x.13072.Bnk,LotArea.LandContour.interaction_lev_x.13072.Low,LotArea.LandContour.interaction_lev_x.13072.Lvl,LotArea.LandContour.interaction_lev_x.14300.6.Bnk,LotArea.LandContour.interaction_lev_x.14300.6.HLS,LotArea.LandContour.interaction_lev_x.14300.6.Low,LotArea.LandContour.interaction_lev_x.14300.6.Lvl,LotArea.LandContour.interaction_lev_x.17142.9.Bnk,LotArea.LandContour.interaction_lev_x.17142.9.HLS,LotArea.LandContour.interaction_lev_x.17142.9.Low,LotArea.LandContour.interaction_lev_x.17142.9.Lvl,LotArea.LandContour.interaction_lev_x.215245.Bnk,LotArea.LandContour.interaction_lev_x.215245.HLS,LotArea.LandContour.interaction_lev_x.215245.Low,LotArea.LandContour.interaction_lev_x.215245.Lvl,LotArea.LandContour.interaction_lev_x.3182.Bnk,LotArea.LandContour.interaction_lev_x.3182.HLS,LotArea.LandContour.interaction_lev_x.3182.Low,LotArea.LandContour.interaction_lev_x.3182.Lvl,LotArea.LandContour.interaction_lev_x.4922.4.Bnk,LotArea.LandContour.interaction_lev_x.4922.4.HLS,LotArea.LandContour.interaction_lev_x.4922.4.Low,LotArea.LandContour.interaction_lev_x.4922.4.Lvl,LotArea.LandContour.interaction_lev_x.6120.Bnk,LotArea.LandContour.interaction_lev_x.6120.HLS,LotArea.LandContour.interaction_lev_x.6120.Low,LotArea.LandContour.interaction_lev_x.6120.Lvl,LotArea.LandContour.interaction_lev_x.7007.6.Low,LotArea.LandContour.interaction_lev_x.7007.6.Lvl,LotArea.LandContour.interaction_lev_x.7478.Bnk,LotArea.LandContour.interaction_lev_x.7478.HLS,LotArea.LandContour.interaction_lev_x.7478.Low,LotArea.LandContour.interaction_lev_x.7478.Lvl,LotArea.LandContour.interaction_lev_x.7960.4.Bnk,LotArea.LandContour.interaction_lev_x.7960.4.HLS,LotArea.LandContour.interaction_lev_x.7960.4.Low,LotArea.LandContour.interaction_lev_x.7960.4.Lvl,LotArea.LandContour.interaction_lev_x.8390.9.Bnk,LotArea.LandContour.interaction_lev_x.8390.9.HLS,LotArea.LandContour.interaction_lev_x.8390.9.Lvl,LotArea.LandContour.interaction_lev_x.8741.Bnk,LotArea.LandContour.interaction_lev_x.8741.HLS,LotArea.LandContour.interaction_lev_x.8741.Low,LotArea.LandContour.interaction_lev_x.8741.Lvl,LotArea.LandContour.interaction_lev_x.9045.Bnk,LotArea.LandContour.interaction_lev_x.9045.HLS,LotArea.LandContour.interaction_lev_x.9045.Low,LotArea.LandContour.interaction_lev_x.9045.Lvl,LotArea.LandContour.interaction_lev_x.9453.Bnk,LotArea.LandContour.interaction_lev_x.9453.HLS,LotArea.LandContour.interaction_lev_x.9453.Low,LotArea.LandContour.interaction_lev_x.9453.Lvl,LotArea.LandContour.interaction_lev_x.9750.Bnk,LotArea.LandContour.interaction_lev_x.9750.Low,LotArea.LandContour.interaction_lev_x.9750.Lvl,Garage.interaction_lev_x.0.NA,Garage.interaction_lev_x.1.Ex,Garage.interaction_lev_x.1.Fa,Garage.interaction_lev_x.1.Gd,Garage.interaction_lev_x.1.Po,Garage.interaction_lev_x.1.TA,Garage.interaction_lev_x.2.Fa,Garage.interaction_lev_x.2.Gd,Garage.interaction_lev_x.2.TA,Garage.interaction_lev_x.3.Ex,Garage.interaction_lev_x.3.Fa,Garage.interaction_lev_x.3.TA,Garage.interaction_lev_x.4.TA,Room.size_clean,TotalBath_clean,AvgHouseLivArea.ratio_clean,SalePrice
0,-0.251638,-0.212287,-0.083017,-0.144792,-0.761373,-0.222645,-0.052396,-0.091003,-0.330677,1.969844,-0.206949,-0.105227,-0.203325,-0.117811,-0.192111,-0.083017,-0.215785,-0.105227,0.517956,-0.418812,-0.24015,-0.207071,-0.064216,0.064216,-0.188246,0.257733,-0.169923,-0.703962,-0.169923,-0.083017,0.760251,-0.212287,-0.188246,-0.158945,0.33701,0.026171,-0.026171,-0.468578,-0.262234,-0.182318,-0.052396,0.622549,0.23749,-0.215785,-0.094752,-0.108503,-0.037024,-0.105227,-0.203325,-0.139784,2.954209,-0.190187,-0.27107,-0.239094,-0.161194,-0.108503,-0.186288,-0.426687,-0.169923,-0.07873,-0.235877,-0.229337,-0.289539,-0.230986,-0.205144,-0.250096,-0.131946,-0.131946,-0.163415,-0.087099,-0.184312,-0.242277,0.398273,-0.074202,-0.114788,-0.087099,-0.134606,-0.037024,-0.058601,-0.037024,-0.064216,0.10185,-0.026171,-0.037024,-0.026171,-0.026171,-0.037024,0.443381,-0.147237,-0.192111,-0.174141,-0.290925,-0.343273,-0.098363,-0.994195,-0.074202,-0.087099,1.509747,-0.161194,-0.215785,-0.037024,-0.111688,-0.045361,-0.117811,-0.293684,-0.610914,-0.586641,1.890596,-0.360475,-0.174141,-0.026171,-0.058601,-0.131946,-0.201493,0.881922,-0.456581,-0.404023,-0.227679,-0.123647,1.050634,0.878367,-0.094752,0.528571,-0.087099,-0.493401,-0.069385,-0.037024,-0.026171,0.134606,-0.026171,-0.026171,-0.026171,-0.087099,-0.058601,-0.064216,-0.117811,-0.026171,-0.037024,-0.188246,-0.026171,-0.208741,-0.423319,-0.026171,-0.421067,-0.282537,-0.037024,-0.131946,1.35414,-0.405169,-0.134606,-0.117811,-0.045361,-0.069385,-0.131946,-0.026171,-0.206949,-0.406313,-0.083017,-0.414285,-0.026171,-0.328124,-0.058601,-0.134606,1.376781,...,-0.037024,-0.037024,-0.045361,0.069385,-0.205144,-0.195909,-0.346999,-0.087099,0.488031,-0.037024,0.195909,-0.037024,-0.186288,-0.026171,-0.087658,-1.598563,0.13873,-0.174141,-0.037024,-0.07873,-0.058601,-0.058601,-0.052396,-0.301858,-0.045361,0.390159,-0.272522,-0.052396,-0.091003,-0.117811,0.467491,-0.30589,-0.037024,-0.037024,0.783979,-0.222645,-0.222645,-0.026171,-0.220946,-0.026171,-0.220946,-0.026171,-0.220946,-0.222645,-0.222645,-0.222645,-0.026171,-0.037024,-0.087099,-0.154359,-0.058601,-0.087099,-0.215785,-0.037024,-0.120762,-0.293684,-0.026171,0.550784,-0.026171,-0.069385,-0.045361,-0.105227,-0.037024,-0.07873,-0.037024,-0.129235,-0.037024,-0.058601,-0.064216,-0.037024,-0.026171,-0.206949,-0.052396,-0.037024,-0.026171,-0.21052,-0.045361,-0.026171,-0.026171,-0.214042,-0.026171,-0.037024,-0.037024,-0.214042,-0.026171,-0.037024,-0.037024,-0.214042,-0.058601,-0.064216,-0.026171,4.914861,-0.058601,-0.026171,-0.212287,-0.058601,-0.026171,-0.052396,-0.205144,-0.045361,-0.058601,-0.052396,-0.201493,-0.052396,-0.045361,-0.026171,-0.208741,-0.052396,-0.026171,-0.026171,-0.214042,-0.045361,-0.064216,-0.026171,-0.206949,-0.026171,-0.026171,-0.026171,-0.219237,-0.026171,-0.222645,-0.037024,-0.058601,-0.026171,-0.21052,-0.026171,-0.037024,-0.064216,-0.208741,-0.045361,-0.037024,-0.215785,-0.045361,-0.037024,-0.026171,-0.214042,-0.037024,-0.037024,-0.026171,-0.215785,-0.058601,-0.058601,-0.037024,-0.203325,-0.037024,-0.045361,-0.214042,-0.242277,-0.037024,-0.152018,-0.037024,-0.045361,-0.53916,-0.094752,-0.091003,0.90924,-0.026171,-0.037024,-0.372492,-0.058601,-0.383442,1.647115,-0.332602,208500.0
1,-0.251638,-0.212287,-0.083017,-0.144792,1.312517,-0.222645,-0.052396,-0.091003,-0.330677,-0.507307,-0.206949,-0.105227,-0.203325,-0.117811,-0.192111,-0.083017,-0.215785,-0.105227,0.517956,-0.418812,0.340726,-0.091855,-0.064216,0.064216,-0.188246,0.257733,-0.169923,-0.703962,-0.169923,-0.083017,0.760251,-0.212287,-0.188246,-0.158945,0.33701,0.026171,-0.026171,-0.468578,-0.262234,5.481171,-0.052396,-1.605199,0.23749,-0.215785,-0.094752,-0.108503,-0.037024,-0.105227,-0.203325,-0.139784,-0.338268,-0.190187,-0.27107,-0.239094,-0.161194,-0.108503,-0.186288,-0.426687,-0.169923,-0.07873,-0.235877,-0.229337,-0.289539,-0.230986,-0.205144,-0.250096,-0.131946,-0.131946,-0.163415,11.473319,-0.184312,4.124686,-2.50912,-0.074202,-0.114788,-0.087099,-0.134606,-0.037024,-0.058601,-0.037024,-0.064216,0.10185,-0.026171,-0.037024,-0.026171,-0.026171,-0.037024,0.443381,-0.147237,-0.192111,-0.174141,-0.290925,-0.343273,-0.098363,1.00515,-0.074202,-0.087099,-0.661909,-0.161194,-0.215785,-0.037024,-0.111688,-0.045361,-0.117811,-0.293684,-0.610914,1.703454,-0.528571,-0.360475,-0.174141,-0.026171,-0.058601,-0.131946,-0.201493,-1.133111,-0.456581,-0.404023,4.389143,-0.123647,0.15668,-0.42943,-0.094752,0.528571,-0.087099,-0.493401,-0.069385,-0.037024,-0.026171,0.134606,-0.026171,-0.026171,-0.026171,-0.087099,-0.058601,-0.064216,-0.117811,-0.026171,-0.037024,-0.188246,-0.026171,-0.208741,-0.423319,-0.026171,2.37329,-0.282537,-0.037024,-0.131946,-0.73797,-0.405169,-0.134606,-0.117811,-0.045361,-0.069385,-0.131946,-0.026171,-0.206949,-0.406313,-0.083017,2.412145,-0.026171,-0.328124,-0.058601,-0.134606,-0.725834,...,-0.037024,-0.037024,-0.045361,0.069385,-0.205144,-0.195909,-0.346999,-0.087099,0.488031,-0.037024,0.195909,-0.037024,-0.186288,-0.026171,-0.087658,-0.488943,-0.614228,-0.174141,-0.037024,-0.07873,-0.058601,-0.058601,-0.052396,-0.301858,-0.045361,0.390159,-0.272522,-0.052396,-0.091003,-0.117811,0.467491,-0.30589,-0.037024,-0.037024,0.783979,-0.222645,-0.222645,-0.026171,-0.220946,-0.026171,-0.220946,-0.026171,-0.220946,-0.222645,-0.222645,-0.222645,-0.026171,-0.037024,-0.087099,-0.154359,-0.058601,-0.087099,4.631073,-0.037024,-0.120762,-0.293684,-0.026171,-1.814349,-0.026171,-0.069385,-0.045361,-0.105227,-0.037024,-0.07873,-0.037024,-0.129235,-0.037024,-0.058601,-0.064216,-0.037024,-0.026171,-0.206949,-0.052396,-0.037024,-0.026171,-0.21052,-0.045361,-0.026171,-0.026171,-0.214042,-0.026171,-0.037024,-0.037024,-0.214042,-0.026171,-0.037024,-0.037024,-0.214042,-0.058601,-0.064216,-0.026171,-0.203325,-0.058601,-0.026171,-0.212287,-0.058601,-0.026171,-0.052396,-0.205144,-0.045361,-0.058601,-0.052396,-0.201493,-0.052396,-0.045361,-0.026171,-0.208741,-0.052396,-0.026171,-0.026171,4.668786,-0.045361,-0.064216,-0.026171,-0.206949,-0.026171,-0.026171,-0.026171,-0.219237,-0.026171,-0.222645,-0.037024,-0.058601,-0.026171,-0.21052,-0.026171,-0.037024,-0.064216,-0.208741,-0.045361,-0.037024,-0.215785,-0.045361,-0.037024,-0.026171,-0.214042,-0.037024,-0.037024,-0.026171,-0.215785,-0.058601,-0.058601,-0.037024,-0.203325,-0.037024,-0.045361,-0.214042,-0.242277,-0.037024,-0.152018,-0.037024,-0.045361,-0.53916,-0.094752,-0.091003,0.90924,-0.026171,-0.037024,-0.372492,-0.058601,-0.459809,0.382768,-1.208109,181500.0
2,-0.251638,-0.212287,-0.083017,-0.144792,-0.761373,-0.222645,-0.052396,-0.091003,-0.330677,1.969844,-0.206949,-0.105227,-0.203325,-0.117811,-0.192111,-0.083017,-0.215785,-0.105227,0.517956,-0.418812,-0.123975,0.073455,-0.064216,0.064216,-0.188246,0.257733,-0.169923,1.419559,-0.169923,-0.083017,-1.314453,-0.212287,-0.188246,-0.158945,0.33701,0.026171,-0.026171,-0.468578,-0.262234,-0.182318,-0.052396,0.622549,0.23749,-0.215785,-0.094752,-0.108503,-0.037024,-0.105227,-0.203325,-0.139784,2.954209,-0.190187,-0.27107,-0.239094,-0.161194,-0.108503,-0.186288,-0.426687,-0.169923,-0.07873,-0.235877,-0.229337,-0.289539,-0.230986,-0.205144,-0.250096,-0.131946,-0.131946,-0.163415,-0.087099,-0.184312,-0.242277,0.398273,-0.074202,-0.114788,-0.087099,-0.134606,-0.037024,-0.058601,-0.037024,-0.064216,0.10185,-0.026171,-0.037024,-0.026171,-0.026171,-0.037024,0.443381,-0.147237,-0.192111,-0.174141,-0.290925,-0.343273,-0.098363,-0.994195,-0.074202,-0.087099,1.509747,-0.161194,-0.215785,-0.037024,-0.111688,-0.045361,-0.117811,-0.293684,-0.610914,-0.586641,1.890596,-0.360475,-0.174141,-0.026171,-0.058601,-0.131946,-0.201493,0.881922,-0.456581,-0.404023,-0.227679,-0.123647,0.984415,0.82993,-0.094752,0.528571,-0.087099,-0.493401,-0.069385,-0.037024,-0.026171,0.134606,-0.026171,-0.026171,-0.026171,-0.087099,-0.058601,-0.064216,-0.117811,-0.026171,-0.037024,-0.188246,-0.026171,-0.208741,-0.423319,-0.026171,-0.421067,-0.282537,-0.037024,-0.131946,1.35414,-0.405169,-0.134606,-0.117811,-0.045361,-0.069385,-0.131946,-0.026171,-0.206949,-0.406313,-0.083017,-0.414285,-0.026171,-0.328124,-0.058601,-0.134606,1.376781,...,-0.037024,-0.037024,-0.045361,0.069385,-0.205144,-0.195909,-0.346999,-0.087099,0.488031,-0.037024,0.195909,-0.037024,-0.186288,-0.026171,-0.087658,0.990552,0.13873,-0.174141,-0.037024,-0.07873,-0.058601,-0.058601,-0.052396,-0.301858,-0.045361,0.390159,-0.272522,-0.052396,-0.091003,-0.117811,0.467491,-0.30589,-0.037024,-0.037024,0.783979,-0.222645,-0.222645,-0.026171,-0.220946,-0.026171,-0.220946,-0.026171,-0.220946,-0.222645,-0.222645,-0.222645,-0.026171,-0.037024,-0.087099,-0.154359,-0.058601,-0.087099,-0.215785,-0.037024,-0.120762,-0.293684,-0.026171,0.550784,-0.026171,-0.069385,-0.045361,-0.105227,-0.037024,-0.07873,-0.037024,-0.129235,-0.037024,-0.058601,-0.064216,-0.037024,-0.026171,-0.206949,-0.052396,-0.037024,-0.026171,-0.21052,-0.045361,-0.026171,-0.026171,-0.214042,-0.026171,-0.037024,-0.037024,-0.214042,-0.026171,-0.037024,-0.037024,-0.214042,-0.058601,-0.064216,-0.026171,-0.203325,-0.058601,-0.026171,-0.212287,-0.058601,-0.026171,-0.052396,-0.205144,-0.045361,-0.058601,-0.052396,-0.201493,-0.052396,-0.045361,-0.026171,-0.208741,-0.052396,-0.026171,-0.026171,-0.214042,-0.045361,-0.064216,-0.026171,4.828804,-0.026171,-0.026171,-0.026171,-0.219237,-0.026171,-0.222645,-0.037024,-0.058601,-0.026171,-0.21052,-0.026171,-0.037024,-0.064216,-0.208741,-0.045361,-0.037024,-0.215785,-0.045361,-0.037024,-0.026171,-0.214042,-0.037024,-0.037024,-0.026171,-0.215785,-0.058601,-0.058601,-0.037024,-0.203325,-0.037024,-0.045361,-0.214042,-0.242277,-0.037024,-0.152018,-0.037024,-0.045361,-0.53916,-0.094752,-0.091003,0.90924,-0.026171,-0.037024,-0.372492,-0.058601,1.492193,1.647115,-0.141029,223500.0


In [467]:
houses_train.info()
houses_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Columns: 465 entries, MSSubClass_lev_x.120 to SalePrice
dtypes: float64(465)
memory usage: 5.2 MB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1459 entries, 0 to 1458
Columns: 465 entries, MSSubClass_lev_x.120 to SalePrice
dtypes: float64(429), int64(36)
memory usage: 5.2 MB


Delete first column

In [468]:
# houses_train.drop('Unnamed: 0', axis=1, inplace=True, errors='raise')
# houses_test.drop('Unnamed: 0', axis=1, inplace=True, errors='raise')

# Run xgboost

### Create private training & test set

In [469]:
from sklearn.model_selection import train_test_split

seed = 10
test_ratio = 0.2

X = houses_train.loc[:, houses_train.columns != "SalePrice"].values # convert to np.array
y = houses_train.loc[:, houses_train.columns == "SalePrice"].values

# Take log of SalePrice
y = np.log(y + 1).ravel() # convert to 1D array for model fit (xxx, )


In [470]:
X_pr_train, X_pr_test, y_pr_train, y_pr_test = train_test_split(X, y, test_size=test_ratio, random_state=seed)

In [471]:
print(len(X_pr_train), "train +", len(X_pr_test), "test")

1168 train + 292 test


### Fit Model

In [None]:
from xgboost import XGBRegressor

xgb_clf = XGBRegressor(max_depth=3, 
                        learning_rate=0.1, 
                        n_estimators=1000, # Number of boosted trees to fit
                        silent=False, # print messages while running 
                        objective='reg:linear', 
                        booster='gbtree', # Specify which booster to use: gbtree, gblinear or dart
                        #for dart see http://xgboost.readthedocs.io/en/latest/tutorials/dart.html 
                        n_jobs=-1, # Number of parallel threads used to run xgboost. (replaces nthread)
                        gamma=0,  # Minimum loss reduction required to make a further partition on a leaf node of the tree.
                        min_child_weight=1, # Minimum sum of instance weight(hessian) needed in a child
                        max_delta_step=0, # Maximum delta step we allow each tree’s weight estimation to be
                        subsample=1, # Subsample ratio of the training instance
                        colsample_bytree=1, # Subsample ratio of columns when constructing each tree
                        colsample_bylevel=1, # Subsample ratio of columns for each split, in each level
                        reg_alpha=0, # L1 regularization term on weights
                        reg_lambda=1, # L2 regularization term on weights
                        scale_pos_weight=1, # Balancing of positive and negative weights
                        base_score=0.5, # The initial prediction score of all instances, global bias
                        random_state=743, 
                        missing=None) # Value in the data which needs to be present as a missing value. If None, defaults to np.nan



In [None]:
xgb_clf.fit(X_pr_train, y_pr_train)

In [None]:
# make predictions for test data

y_pr_pred = xgb_clf.predict(X_pr_test)

Evaluate predictions

In [None]:
from sklearn.metrics import mean_squared_error

mse = mean_squared_error(y_pr_test, y_pr_pred)
rmse = np.sqrt(mse)
rmse


In [None]:
y_pr_test[1:5]
y_pr_pred[1:5]

### Model Tuning

In [None]:
from sklearn.model_selection import GridSearchCV

xgb_params = {'max_depth': 3,
              'learning_rate': 0.1, 
              'n_estimators': 100, 
              'objective': 'reg:linear'}

param_grid = {'max_depth': [3, 4], 
              'learning_rate': [0.01, 0.03, 0.05, 0.07], 
              'n_estimators': [2000, 2500, 3000], # Number of boosted trees to fit
              'objective': ['reg:linear'], 
              'booster': ['gbtree', 'gblinear' 'dart'], # Specify which booster to use: gbtree, gblinear or dart
              'n_jobs': [-1], # Number of parallel threads used to run xgboost. (replaces nthread)
              'gamma': [0],  # Minimum loss reduction required to make a further partition on a leaf node of the tree.
              'min_child_weight': [1], # Minimum sum of instance weight(hessian) needed in a child
              'max_delta_step': [0], # Maximum delta step we allow each tree’s weight estimation to be
              'subsample': [1], # Subsample ratio of the training instance
              'colsample_bytree': [0.5, 0.6, 0.7], # Subsample ratio of columns when constructing each tree
              'colsample_bylevel': [0.2, 0.3, 0.4], # Subsample ratio of columns for each split, in each level
              'reg_alpha': [0], # L1 regularization term on weights
              'reg_lambda': [1], # L2 regularization term on weights
              'scale_pos_weight': [1], # Balancing of positive and negative weights
              'base_score': [0.5], # The initial prediction score of all instances, global bias
              'silent': [True],
              'random_state': [10]}

optimized_xgb_clf = GridSearchCV(XGBRegressor(**xgb_params), # scikit-learn estimator interface 
                                 param_grid = param_grid, # Dictionary with parameters names (string) as keys
                                 scoring="neg_mean_squared_error", # controls what metric they apply to the estimators evaluated
                                 n_jobs=-1, # If True, the data is assumed to be identically distributed across the folds,
                                 iid=True, 
                                 refit=True, # Refit an estimator using the best found parameters (best_estimator_)
                                 cv=2, #integer, to specify the number of folds in a (Stratified)KFold. None -> default 3-fold cross validation
                                 verbose=10, # the higher, the more messages
                                 pre_dispatch="2*n_jobs", # number of jobs that get dispatched during parallel execution
                                 error_score="raise", 
                                 return_train_score=False) #If False, the cv_results_ attribute will not include training scores

Inspect the grid

In [None]:
optimized_xgb_clf

Run grid tuning

In [None]:
optimized_xgb_clf.fit(X_pr_train, y_pr_train)

The best combination of parameters is:

In [None]:
optimized_xgb_clf.best_params_

In [None]:
## The best score is
optimized_xgb_clf.best_score_ # that's the training score so not meaningful

In [None]:
# optimized_xgb_clf.cv_results_

In [None]:
# make predictions for test data

y_pr_pred = optimized_xgb_clf.predict(X_pr_test)

Evaluate predictions

In [None]:
from sklearn.metrics import mean_squared_error

mse = mean_squared_error(y_pr_test, y_pr_pred)
rmse = np.sqrt(mse)
rmse


In [None]:
y_pr_test[1:5]
y_pr_pred[1:5]