# Setup

Make sure this notebook works well in both python 2 and 3, import a few common modules, ensure MatplotLib plots figures inline

In [410]:
# To support both python 2 and python 3
from __future__ import division, print_function, unicode_literals

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
%config InlineBackend.figure_format = 'retina' # for retina screens
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12

Configure notebook to display all results in cell

In [411]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Revert to the last line of output only
# InteractiveShell.ast_node_interactivity = "last_expr"

# Load Data

In [412]:
import pandas as pd

houses_train = pd.read_csv('../Data/encoded_houses_train.csv')
houses_test = pd.read_csv('../Data/encoded_houses_test.csv')

In [413]:
print("houses_train dimensions: {}".format(houses_train.shape))
print("houses_test dimensions: {}".format(houses_test.shape))

houses_train dimensions: (1460, 537)
houses_test dimensions: (1459, 537)


In [414]:
pd.set_option("display.max_columns", 537)
houses_train.head(3)

Unnamed: 0,MSSubClass30,MSSubClass40,MSSubClass45,MSSubClass50,MSSubClass60,MSSubClass70,MSSubClass75,MSSubClass80,MSSubClass85,MSSubClass90,MSSubClass120,MSSubClass150,MSSubClass160,MSSubClass180,MSSubClass190,MSZoningC (all),MSZoningFV,MSZoningI,MSZoningRH,MSZoningRL,MSZoningRP,MSZoningRM,LotFrontage,LotArea,StreetPave,AlleyPave,AlleyNA,LotShapeIR1,LotShapeIR2,LotShapeIR3,LandContourBnk,LandContourHLS,LandContourLow,UtilitiesNoSewr,UtilitiesNoSeWa,UtilitiesELO,LotConfigCorner,LotConfigCulDSac,LotConfigFR2,LotConfigFR3,LandSlopeMod,LandSlopeSev,NeighborhoodBlueste,NeighborhoodBrDale,NeighborhoodBrkSide,NeighborhoodClearCr,NeighborhoodCollgCr,NeighborhoodCrawfor,NeighborhoodEdwards,NeighborhoodGilbert,NeighborhoodIDOTRR,NeighborhoodMeadowV,NeighborhoodMitchel,NeighborhoodNAmes,NeighborhoodNoRidge,NeighborhoodNPkVill,NeighborhoodNridgHt,NeighborhoodNWAmes,NeighborhoodOldTown,NeighborhoodSWISU,NeighborhoodSawyer,NeighborhoodSawyerW,NeighborhoodSomerst,NeighborhoodStoneBr,NeighborhoodTimber,NeighborhoodVeenker,Condition1Feedr,Condition1Norm,Condition1RRNn,Condition1RRAn,Condition1PosN,Condition1PosA,Condition1RRNe,Condition1RRAe,Condition2Feedr,Condition2Norm,Condition2RRNn,Condition2RRAn,Condition2PosN,Condition2PosA,Condition2RRNe,Condition2RRAe,BldgType2fmCon,BldgTypeDuplex,BldgTypeTwnhsE,BldgTypeTwnhs,HouseStyle1.5Fin,HouseStyle1.5Unf,HouseStyle2Story,HouseStyle2.5Fin,HouseStyle2.5Unf,HouseStyleSFoyer,HouseStyleSLvl,OverallQual9,OverallQual8,OverallQual7,OverallQual6,OverallQual5,OverallQual4,OverallQual3,OverallQual2,OverallQual1,OverallCond9,OverallCond8,OverallCond7,OverallCond6,OverallCond5,OverallCond4,OverallCond3,OverallCond2,OverallCond1,YearBuilt,YearRemodAdd,RoofStyleGable,RoofStyleGambrel,RoofStyleHip,RoofStyleMansard,RoofStyleShed,RoofMatlCompShg,RoofMatlMembran,RoofMatlMetal,RoofMatlRoll,RoofMatlTar&Grv,RoofMatlWdShake,RoofMatlWdShngl,Exterior1stAsphShn,Exterior1stBrkComm,Exterior1stBrkFace,Exterior1stCBlock,Exterior1stCemntBd,Exterior1stHdBoard,Exterior1stImStucc,Exterior1stMetalSd,Exterior1stOther,Exterior1stPlywood,Exterior1stPreCast,Exterior1stStone,Exterior1stStucco,Exterior1stVinylSd,Exterior1stWd Sdng,Exterior1stWdShing,Exterior2ndAsphShn,Exterior2ndBrk Cmn,Exterior2ndBrkFace,Exterior2ndCBlock,Exterior2ndCmentBd,Exterior2ndHdBoard,Exterior2ndImStucc,Exterior2ndMetalSd,Exterior2ndOther,Exterior2ndPlywood,Exterior2ndPreCast,Exterior2ndStone,Exterior2ndStucco,Exterior2ndVinylSd,Exterior2ndWd Sdng,Exterior2ndWd Shng,MasVnrTypeBrkFace,MasVnrTypeCBlock,MasVnrTypeNone,MasVnrTypeStone,MasVnrArea,ExterQualGd,ExterQualTA,ExterQualFa,ExterQualPo,ExterCondGd,ExterCondTA,ExterCondFa,ExterCondPo,FoundationCBlock,FoundationPConc,FoundationSlab,FoundationStone,FoundationWood,BsmtQualGd,BsmtQualTA,BsmtQualFa,BsmtQualPo,BsmtQualNA,BsmtCondGd,BsmtCondTA,BsmtCondFa,BsmtCondPo,BsmtCondNA,BsmtExposureAv,BsmtExposureMn,BsmtExposureNo,BsmtExposureNA,BsmtFinType1ALQ,BsmtFinType1BLQ,BsmtFinType1Rec,BsmtFinType1LwQ,BsmtFinType1Unf,BsmtFinType1NA,BsmtFinSF1,BsmtFinType2ALQ,BsmtFinType2BLQ,BsmtFinType2Rec,BsmtFinType2LwQ,BsmtFinType2Unf,BsmtFinType2NA,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,HeatingGasA,HeatingGasW,HeatingGrav,HeatingOthW,HeatingWall,HeatingQCGd,HeatingQCTA,HeatingQCFa,HeatingQCPo,CentralAirY,ElectricalFuseA,ElectricalFuseF,ElectricalFuseP,ElectricalMix,X1stFlrSF,X2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQualGd,KitchenQualTA,KitchenQualFa,KitchenQualPo,TotRmsAbvGrd,FunctionalMin1,FunctionalMin2,FunctionalMod,FunctionalMaj1,FunctionalMaj2,FunctionalSev,FunctionalSal,Fireplaces,FireplaceQuGd,FireplaceQuTA,FireplaceQuFa,FireplaceQuPo,FireplaceQuNA,GarageTypeAttchd,GarageTypeBasment,GarageTypeBuiltIn,GarageTypeCarPort,GarageTypeDetchd,GarageTypeNA,GarageYrBlt,GarageFinishRFn,GarageFinishUnf,GarageFinishNA,GarageCars,GarageArea,GarageQualGd,GarageQualTA,GarageQualFa,GarageQualPo,GarageQualNA,GarageCondGd,GarageCondTA,GarageCondFa,GarageCondPo,GarageCondNA,PavedDriveP,PavedDriveN,WoodDeckSF,OpenPorchSF,EnclosedPorch,X3SsnPorch,ScreenPorch,PoolArea,PoolQCGd,PoolQCTA,PoolQCFa,PoolQCNA,FenceMnPrv,FenceGdWo,FenceMnWw,FenceNA,MiscFeatureGar2,MiscFeatureOthr,MiscFeatureShed,MiscFeatureTenC,MiscFeatureNA,MiscVal,SaleTypeCWD,SaleTypeVWD,SaleTypeNew,SaleTypeCOD,SaleTypeCon,SaleTypeConLw,SaleTypeConLI,SaleTypeConLD,SaleTypeOth,SaleConditionAbnorml,SaleConditionAdjLand,SaleConditionAlloca,SaleConditionFamily,SaleConditionPartial,PoolMasVnrArea.interaction72.Ex,PoolMasVnrArea.interaction120.Ex,PoolMasVnrArea.interaction163.5.Ex,PoolMasVnrArea.interaction200.Ex,PoolMasVnrArea.interaction256.Ex,PoolMasVnrArea.interaction322.2.Ex,PoolMasVnrArea.interaction466.Ex,PoolMasVnrArea.interaction1600.Ex,PoolMasVnrArea.interaction0.Gd,PoolMasVnrArea.interaction72.Gd,PoolMasVnrArea.interaction120.Gd,PoolMasVnrArea.interaction163.5.Gd,PoolMasVnrArea.interaction200.Gd,PoolMasVnrArea.interaction256.Gd,PoolMasVnrArea.interaction322.2.Gd,PoolMasVnrArea.interaction466.Gd,PoolMasVnrArea.interaction1600.Gd,PoolMasVnrArea.interaction0.TA,PoolMasVnrArea.interaction72.TA,PoolMasVnrArea.interaction120.TA,PoolMasVnrArea.interaction163.5.TA,PoolMasVnrArea.interaction200.TA,PoolMasVnrArea.interaction256.TA,PoolMasVnrArea.interaction322.2.TA,PoolMasVnrArea.interaction466.TA,PoolMasVnrArea.interaction1600.TA,PoolMasVnrArea.interaction0.Fa,PoolMasVnrArea.interaction72.Fa,PoolMasVnrArea.interaction120.Fa,PoolMasVnrArea.interaction163.5.Fa,PoolMasVnrArea.interaction200.Fa,PoolMasVnrArea.interaction256.Fa,PoolMasVnrArea.interaction322.2.Fa,PoolMasVnrArea.interaction466.Fa,PoolMasVnrArea.interaction1600.Fa,PoolMasVnrArea.interaction0.NA,PoolMasVnrArea.interaction72.NA,PoolMasVnrArea.interaction120.NA,PoolMasVnrArea.interaction163.5.NA,PoolMasVnrArea.interaction200.NA,PoolMasVnrArea.interaction256.NA,PoolMasVnrArea.interaction322.2.NA,PoolMasVnrArea.interaction466.NA,PoolMasVnrArea.interaction1600.NA,Condition2.ExterCond.interactionFeedr.Ex,Condition2.ExterCond.interactionNorm.Ex,Condition2.ExterCond.interactionRRNn.Ex,Condition2.ExterCond.interactionRRAn.Ex,Condition2.ExterCond.interactionPosN.Ex,Condition2.ExterCond.interactionPosA.Ex,Condition2.ExterCond.interactionRRNe.Ex,Condition2.ExterCond.interactionRRAe.Ex,Condition2.ExterCond.interactionArtery.Gd,Condition2.ExterCond.interactionFeedr.Gd,Condition2.ExterCond.interactionNorm.Gd,Condition2.ExterCond.interactionRRNn.Gd,Condition2.ExterCond.interactionRRAn.Gd,Condition2.ExterCond.interactionPosN.Gd,Condition2.ExterCond.interactionPosA.Gd,Condition2.ExterCond.interactionRRNe.Gd,Condition2.ExterCond.interactionRRAe.Gd,Condition2.ExterCond.interactionArtery.TA,Condition2.ExterCond.interactionFeedr.TA,Condition2.ExterCond.interactionNorm.TA,Condition2.ExterCond.interactionRRNn.TA,Condition2.ExterCond.interactionRRAn.TA,Condition2.ExterCond.interactionPosN.TA,Condition2.ExterCond.interactionPosA.TA,Condition2.ExterCond.interactionRRNe.TA,Condition2.ExterCond.interactionRRAe.TA,Condition2.ExterCond.interactionArtery.Fa,Condition2.ExterCond.interactionFeedr.Fa,Condition2.ExterCond.interactionNorm.Fa,Condition2.ExterCond.interactionRRNn.Fa,Condition2.ExterCond.interactionRRAn.Fa,Condition2.ExterCond.interactionPosN.Fa,Condition2.ExterCond.interactionPosA.Fa,Condition2.ExterCond.interactionRRNe.Fa,Condition2.ExterCond.interactionRRAe.Fa,Condition2.ExterCond.interactionArtery.Po,Condition2.ExterCond.interactionFeedr.Po,Condition2.ExterCond.interactionNorm.Po,Condition2.ExterCond.interactionRRNn.Po,Condition2.ExterCond.interactionRRAn.Po,Condition2.ExterCond.interactionPosN.Po,Condition2.ExterCond.interactionPosA.Po,Condition2.ExterCond.interactionRRNe.Po,Condition2.ExterCond.interactionRRAe.Po,LotArea.LandContour.interaction3182.Lvl,LotArea.LandContour.interaction4922.4.Lvl,LotArea.LandContour.interaction6120.Lvl,LotArea.LandContour.interaction7007.6.Lvl,LotArea.LandContour.interaction7478.Lvl,LotArea.LandContour.interaction7960.4.Lvl,LotArea.LandContour.interaction8390.9.Lvl,LotArea.LandContour.interaction8741.Lvl,LotArea.LandContour.interaction9045.Lvl,LotArea.LandContour.interaction9453.Lvl,LotArea.LandContour.interaction9750.Lvl,LotArea.LandContour.interaction10151.6.Lvl,LotArea.LandContour.interaction10550.5.Lvl,LotArea.LandContour.interaction11001.2.Lvl,LotArea.LandContour.interaction11570.Lvl,LotArea.LandContour.interaction12203.8.Lvl,LotArea.LandContour.interaction13072.Lvl,LotArea.LandContour.interaction14300.6.Lvl,LotArea.LandContour.interaction17142.9.Lvl,LotArea.LandContour.interaction215245.Lvl,LotArea.LandContour.interaction1300.Bnk,LotArea.LandContour.interaction3182.Bnk,LotArea.LandContour.interaction4922.4.Bnk,LotArea.LandContour.interaction6120.Bnk,LotArea.LandContour.interaction7007.6.Bnk,LotArea.LandContour.interaction7478.Bnk,LotArea.LandContour.interaction7960.4.Bnk,LotArea.LandContour.interaction8390.9.Bnk,LotArea.LandContour.interaction8741.Bnk,LotArea.LandContour.interaction9045.Bnk,LotArea.LandContour.interaction9453.Bnk,LotArea.LandContour.interaction9750.Bnk,LotArea.LandContour.interaction10151.6.Bnk,LotArea.LandContour.interaction10550.5.Bnk,LotArea.LandContour.interaction11001.2.Bnk,LotArea.LandContour.interaction11570.Bnk,LotArea.LandContour.interaction12203.8.Bnk,LotArea.LandContour.interaction13072.Bnk,LotArea.LandContour.interaction14300.6.Bnk,LotArea.LandContour.interaction17142.9.Bnk,LotArea.LandContour.interaction215245.Bnk,LotArea.LandContour.interaction1300.HLS,LotArea.LandContour.interaction3182.HLS,LotArea.LandContour.interaction4922.4.HLS,LotArea.LandContour.interaction6120.HLS,LotArea.LandContour.interaction7007.6.HLS,LotArea.LandContour.interaction7478.HLS,LotArea.LandContour.interaction7960.4.HLS,LotArea.LandContour.interaction8390.9.HLS,LotArea.LandContour.interaction8741.HLS,LotArea.LandContour.interaction9045.HLS,LotArea.LandContour.interaction9453.HLS,LotArea.LandContour.interaction9750.HLS,LotArea.LandContour.interaction10151.6.HLS,LotArea.LandContour.interaction10550.5.HLS,LotArea.LandContour.interaction11001.2.HLS,LotArea.LandContour.interaction11570.HLS,LotArea.LandContour.interaction12203.8.HLS,LotArea.LandContour.interaction13072.HLS,LotArea.LandContour.interaction14300.6.HLS,LotArea.LandContour.interaction17142.9.HLS,LotArea.LandContour.interaction215245.HLS,LotArea.LandContour.interaction1300.Low,LotArea.LandContour.interaction3182.Low,LotArea.LandContour.interaction4922.4.Low,LotArea.LandContour.interaction6120.Low,LotArea.LandContour.interaction7007.6.Low,LotArea.LandContour.interaction7478.Low,LotArea.LandContour.interaction7960.4.Low,LotArea.LandContour.interaction8390.9.Low,LotArea.LandContour.interaction8741.Low,LotArea.LandContour.interaction9045.Low,LotArea.LandContour.interaction9453.Low,LotArea.LandContour.interaction9750.Low,LotArea.LandContour.interaction10151.6.Low,LotArea.LandContour.interaction10550.5.Low,LotArea.LandContour.interaction11001.2.Low,LotArea.LandContour.interaction11570.Low,LotArea.LandContour.interaction12203.8.Low,LotArea.LandContour.interaction13072.Low,LotArea.LandContour.interaction14300.6.Low,LotArea.LandContour.interaction17142.9.Low,LotArea.LandContour.interaction215245.Low,Garage.interaction1.Ex,Garage.interaction2.Ex,Garage.interaction3.Ex,Garage.interaction4.Ex,Garage.interaction5.Ex,Garage.interaction0.Gd,Garage.interaction1.Gd,Garage.interaction2.Gd,Garage.interaction3.Gd,Garage.interaction4.Gd,Garage.interaction5.Gd,Garage.interaction0.TA,Garage.interaction1.TA,Garage.interaction2.TA,Garage.interaction3.TA,Garage.interaction4.TA,Garage.interaction5.TA,Garage.interaction0.Fa,Garage.interaction1.Fa,Garage.interaction2.Fa,Garage.interaction3.Fa,Garage.interaction4.Fa,Garage.interaction5.Fa,Garage.interaction0.Po,Garage.interaction1.Po,Garage.interaction2.Po,Garage.interaction3.Po,Garage.interaction4.Po,Garage.interaction5.Po,Garage.interaction0.NA,Garage.interaction1.NA,Garage.interaction2.NA,Garage.interaction3.NA,Garage.interaction4.NA,Garage.interaction5.NA,Kitchen.interaction1.Ex,Kitchen.interaction2.Ex,Kitchen.interaction3.Ex,Kitchen.interaction0.Gd,Kitchen.interaction1.Gd,Kitchen.interaction2.Gd,Kitchen.interaction3.Gd,Kitchen.interaction0.TA,Kitchen.interaction1.TA,Kitchen.interaction2.TA,Kitchen.interaction3.TA,Kitchen.interaction0.Fa,Kitchen.interaction1.Fa,Kitchen.interaction2.Fa,Kitchen.interaction3.Fa,Kitchen.interaction0.Po,Kitchen.interaction1.Po,Kitchen.interaction2.Po,Kitchen.interaction3.Po,new.old,Room.size,full.YrSold,QuarterSold,TotalBath,AvgHouseLivArea.ratio,SalePrice
0,-0.222645,-0.052396,-0.091003,-0.330677,1.969844,-0.206949,-0.105227,-0.203325,-0.117811,-0.192111,-0.251638,,-0.212287,-0.083017,-0.144792,-0.083017,-0.215785,,-0.105227,0.517956,,-0.418812,-0.24015,-0.207071,0.064216,-0.169923,0.257733,-0.703962,-0.169923,-0.083017,-0.212287,-0.188246,-0.158945,,-0.026171,,-0.468578,-0.262234,-0.182318,-0.052396,-0.215785,-0.094752,-0.037024,-0.105227,-0.203325,-0.139784,2.954209,-0.190187,-0.27107,-0.239094,-0.161194,-0.108503,-0.186288,-0.426687,-0.169923,-0.07873,-0.235877,-0.229337,-0.289539,-0.131946,-0.230986,-0.205144,-0.250096,-0.131946,-0.163415,-0.087099,-0.242277,0.398273,-0.058601,-0.134606,-0.114788,-0.074202,-0.037024,-0.087099,-0.064216,0.10185,-0.037024,-0.026171,-0.037024,-0.026171,,-0.026171,-0.147237,-0.192111,-0.290925,-0.174141,-0.343273,-0.098363,1.509747,-0.074202,-0.087099,-0.161194,-0.215785,-0.174141,-0.360475,1.890596,-0.586641,-0.610914,-0.293684,-0.117811,-0.045361,-0.037024,-0.123647,-0.227679,-0.404023,-0.456581,0.881922,-0.201493,-0.131946,-0.058601,-0.026171,1.050634,0.878367,0.528571,-0.087099,-0.493401,-0.069385,-0.037024,0.134606,-0.026171,-0.026171,-0.026171,-0.087099,-0.058601,-0.064216,-0.026171,-0.037024,-0.188246,-0.026171,-0.208741,-0.423319,-0.026171,-0.421067,,-0.282537,,-0.037024,-0.131946,1.35414,-0.405169,-0.134606,-0.045361,-0.069385,-0.131946,-0.026171,-0.206949,-0.406313,-0.083017,-0.414285,-0.026171,-0.328124,,-0.058601,-0.134606,1.376781,-0.394805,-0.163415,1.509747,,-1.217365,-0.309888,0.513928,1.410829,-1.278381,-0.098363,,-0.333219,0.372492,-0.139784,-0.026171,-0.875802,1.120584,-0.129235,-0.064216,-0.045361,1.166845,-0.894259,-0.156667,,-0.161194,-0.215785,0.33701,-0.17827,-0.037024,-0.161194,-0.422194,-0.290925,0.729136,-0.163415,-0.421067,-0.335749,-0.316477,-0.230986,-0.645902,-0.161194,0.575228,-0.114788,-0.152018,-0.195909,-0.180304,0.402876,-0.163415,-0.288554,-0.944267,-0.459145,0.149645,-0.111688,-0.069385,-0.037024,-0.052396,-0.444486,-0.643774,-0.186288,-0.026171,0.263722,-0.262234,-0.137218,-0.045361,-0.026171,-0.793162,1.161454,-0.120201,0.370207,1.107431,-0.240978,0.78947,1.227165,0.163723,-0.211381,1.220838,-1.006528,-0.16561,,0.911897,-0.147237,-0.154359,-0.10185,-0.098363,-0.058601,-0.026171,,-0.950901,-0.592968,-0.522206,-0.152018,-0.117811,1.05602,0.823223,-0.114788,-0.253172,-0.07873,-0.600353,-0.242277,1.00706,1.567811,-0.840903,-0.242277,0.311618,0.35088,-0.098363,0.33701,-0.184312,-0.045361,-0.242277,-0.07873,0.317784,-0.156667,-0.069385,-0.242277,-0.144792,-0.25622,-0.751918,0.216429,-0.359202,-0.116299,-0.270116,-0.068668,-0.045361,,-0.037024,0.069385,-0.346999,-0.195909,-0.087099,0.488031,-0.037024,-0.037024,-0.186288,-0.026171,0.195909,-0.087658,-0.052396,,-0.301858,-0.174141,-0.037024,-0.058601,-0.058601,-0.07873,-0.045361,-0.272522,-0.052396,-0.091003,-0.117811,-0.30589,,,,,,,,,,,,-0.026171,-0.026171,-0.026171,,,,,,,,,,,,,-0.037024,,,,,,,,,0.783979,-0.222645,-0.222645,-0.220946,-0.220946,-0.220946,-0.222645,-0.222645,-0.222645,,-0.037024,,,,,,,-0.087099,-0.087099,-0.293684,,-0.037024,-0.045361,-0.026171,,-0.037024,-0.154359,-0.215785,0.550784,-0.058601,-0.129235,-0.105227,-0.069385,-0.037024,-0.07873,-0.037024,-0.058601,-0.120762,,,,,,,,,-0.026171,,,,,,,-0.214042,-0.206949,-0.219237,-0.222645,-0.21052,-0.208741,-0.215785,-0.214042,-0.215785,-0.203325,-0.214042,-0.206949,-0.21052,-0.214042,-0.214042,-0.214042,-0.212287,-0.205144,-0.201493,-0.208741,-0.058601,-0.052396,-0.045361,-0.026171,,-0.037024,-0.026171,-0.045361,-0.045361,-0.037024,-0.058601,-0.037024,-0.064216,-0.052396,-0.045361,-0.026171,-0.026171,-0.058601,-0.058601,-0.045361,-0.052396,-0.064216,-0.026171,-0.064216,-0.026171,,-0.058601,-0.037024,-0.037024,-0.037024,-0.037024,-0.058601,,-0.037024,-0.037024,-0.026171,-0.037024,-0.037024,,-0.026171,-0.058601,-0.045361,-0.026171,-0.026171,-0.026171,-0.026171,-0.026171,-0.026171,-0.064216,,-0.026171,-0.026171,-0.037024,-0.045361,-0.026171,-0.026171,-0.026171,-0.037024,-0.037024,-0.026171,-0.052396,-0.052396,-0.026171,-0.037024,,-0.026171,,,,-0.037024,-0.091003,,,,,-0.53916,0.90924,-0.372492,-0.058601,,,-0.152018,-0.094752,-0.037024,,,,-0.045361,,,,,-0.242277,,,,,,-0.27107,,,,1.226074,-0.045361,,-0.026171,-0.926976,-0.201493,-0.037024,,-0.154359,-0.058601,,,,,,0.231484,-0.377916,0.106482,-1.5682,1.647115,-0.332602,208500.0
1,-0.222645,-0.052396,-0.091003,-0.330677,-0.507307,-0.206949,-0.105227,-0.203325,-0.117811,-0.192111,-0.251638,,-0.212287,-0.083017,-0.144792,-0.083017,-0.215785,,-0.105227,0.517956,,-0.418812,0.340726,-0.091855,0.064216,-0.169923,0.257733,-0.703962,-0.169923,-0.083017,-0.212287,-0.188246,-0.158945,,-0.026171,,-0.468578,-0.262234,5.481171,-0.052396,-0.215785,-0.094752,-0.037024,-0.105227,-0.203325,-0.139784,-0.338268,-0.190187,-0.27107,-0.239094,-0.161194,-0.108503,-0.186288,-0.426687,-0.169923,-0.07873,-0.235877,-0.229337,-0.289539,-0.131946,-0.230986,-0.205144,-0.250096,-0.131946,-0.163415,11.473319,4.124686,-2.50912,-0.058601,-0.134606,-0.114788,-0.074202,-0.037024,-0.087099,-0.064216,0.10185,-0.037024,-0.026171,-0.037024,-0.026171,,-0.026171,-0.147237,-0.192111,-0.290925,-0.174141,-0.343273,-0.098363,-0.661909,-0.074202,-0.087099,-0.161194,-0.215785,-0.174141,-0.360475,-0.528571,1.703454,-0.610914,-0.293684,-0.117811,-0.045361,-0.037024,-0.123647,4.389143,-0.404023,-0.456581,-1.133111,-0.201493,-0.131946,-0.058601,-0.026171,0.15668,-0.42943,0.528571,-0.087099,-0.493401,-0.069385,-0.037024,0.134606,-0.026171,-0.026171,-0.026171,-0.087099,-0.058601,-0.064216,-0.026171,-0.037024,-0.188246,-0.026171,-0.208741,-0.423319,-0.026171,2.37329,,-0.282537,,-0.037024,-0.131946,-0.73797,-0.405169,-0.134606,-0.045361,-0.069385,-0.131946,-0.026171,-0.206949,-0.406313,-0.083017,2.412145,-0.026171,-0.328124,,-0.058601,-0.134606,-0.725834,-0.394805,-0.163415,-0.661909,,0.820884,-0.309888,-0.570555,-0.708318,0.781703,-0.098363,,-0.333219,0.372492,-0.139784,-0.026171,1.141029,-0.891781,-0.129235,-0.064216,-0.045361,1.166845,-0.894259,-0.156667,,-0.161194,-0.215785,0.33701,-0.17827,-0.037024,-0.161194,-0.422194,-0.290925,-1.370546,-0.163415,2.37329,-0.335749,-0.316477,-0.230986,-0.645902,-0.161194,1.171591,-0.114788,-0.152018,-0.195909,-0.180304,0.402876,-0.163415,-0.288554,-0.641008,0.466305,0.149645,-0.111688,-0.069385,-0.037024,-0.052396,-0.444486,-0.643774,-0.186288,-0.026171,0.263722,-0.262234,-0.137218,-0.045361,-0.026171,0.257052,-0.794891,-0.120201,-0.482347,-0.819684,3.947457,0.78947,-0.76136,0.163723,-0.211381,-0.818548,0.992834,-0.16561,,-0.318574,-0.147237,-0.154359,-0.10185,-0.098363,-0.058601,-0.026171,,0.600289,-0.592968,1.913642,-0.152018,-0.117811,-0.946303,0.823223,-0.114788,-0.253172,-0.07873,-0.600353,-0.242277,-0.019293,1.567811,-0.840903,-0.242277,0.311618,-0.06071,-0.098363,0.33701,-0.184312,-0.045361,-0.242277,-0.07873,0.317784,-0.156667,-0.069385,-0.242277,-0.144792,-0.25622,1.625638,-0.704242,-0.359202,-0.116299,-0.270116,-0.068668,-0.045361,,-0.037024,0.069385,-0.346999,-0.195909,-0.087099,0.488031,-0.037024,-0.037024,-0.186288,-0.026171,0.195909,-0.087658,-0.052396,,-0.301858,-0.174141,-0.037024,-0.058601,-0.058601,-0.07873,-0.045361,-0.272522,-0.052396,-0.091003,-0.117811,-0.30589,,,,,,,,,,,,-0.026171,-0.026171,-0.026171,,,,,,,,,,,,,-0.037024,,,,,,,,,0.783979,-0.222645,-0.222645,-0.220946,-0.220946,-0.220946,-0.222645,-0.222645,-0.222645,,-0.037024,,,,,,,-0.087099,-0.087099,-0.293684,,-0.037024,-0.045361,-0.026171,,-0.037024,-0.154359,4.631073,-1.814349,-0.058601,-0.129235,-0.105227,-0.069385,-0.037024,-0.07873,-0.037024,-0.058601,-0.120762,,,,,,,,,-0.026171,,,,,,,4.668786,-0.206949,-0.219237,-0.222645,-0.21052,-0.208741,-0.215785,-0.214042,-0.215785,-0.203325,-0.214042,-0.206949,-0.21052,-0.214042,-0.214042,-0.214042,-0.212287,-0.205144,-0.201493,-0.208741,-0.058601,-0.052396,-0.045361,-0.026171,,-0.037024,-0.026171,-0.045361,-0.045361,-0.037024,-0.058601,-0.037024,-0.064216,-0.052396,-0.045361,-0.026171,-0.026171,-0.058601,-0.058601,-0.045361,-0.052396,-0.064216,-0.026171,-0.064216,-0.026171,,-0.058601,-0.037024,-0.037024,-0.037024,-0.037024,-0.058601,,-0.037024,-0.037024,-0.026171,-0.037024,-0.037024,,-0.026171,-0.058601,-0.045361,-0.026171,-0.026171,-0.026171,-0.026171,-0.026171,-0.026171,-0.064216,,-0.026171,-0.026171,-0.037024,-0.045361,-0.026171,-0.026171,-0.026171,-0.037024,-0.037024,-0.026171,-0.052396,-0.052396,-0.026171,-0.037024,,-0.026171,,,,-0.037024,-0.091003,,,,,-0.53916,0.90924,-0.372492,-0.058601,,,-0.152018,-0.094752,-0.037024,,,,-0.045361,,,,,-0.242277,,,,,,-0.27107,,,,-0.815053,-0.045361,,-0.026171,1.078038,-0.201493,-0.037024,,-0.154359,-0.058601,,,,,,-0.241097,-0.467335,-0.625911,-0.486683,0.382768,-1.208109,181500.0
2,-0.222645,-0.052396,-0.091003,-0.330677,1.969844,-0.206949,-0.105227,-0.203325,-0.117811,-0.192111,-0.251638,,-0.212287,-0.083017,-0.144792,-0.083017,-0.215785,,-0.105227,0.517956,,-0.418812,-0.123975,0.073455,0.064216,-0.169923,0.257733,1.419559,-0.169923,-0.083017,-0.212287,-0.188246,-0.158945,,-0.026171,,-0.468578,-0.262234,-0.182318,-0.052396,-0.215785,-0.094752,-0.037024,-0.105227,-0.203325,-0.139784,2.954209,-0.190187,-0.27107,-0.239094,-0.161194,-0.108503,-0.186288,-0.426687,-0.169923,-0.07873,-0.235877,-0.229337,-0.289539,-0.131946,-0.230986,-0.205144,-0.250096,-0.131946,-0.163415,-0.087099,-0.242277,0.398273,-0.058601,-0.134606,-0.114788,-0.074202,-0.037024,-0.087099,-0.064216,0.10185,-0.037024,-0.026171,-0.037024,-0.026171,,-0.026171,-0.147237,-0.192111,-0.290925,-0.174141,-0.343273,-0.098363,1.509747,-0.074202,-0.087099,-0.161194,-0.215785,-0.174141,-0.360475,1.890596,-0.586641,-0.610914,-0.293684,-0.117811,-0.045361,-0.037024,-0.123647,-0.227679,-0.404023,-0.456581,0.881922,-0.201493,-0.131946,-0.058601,-0.026171,0.984415,0.82993,0.528571,-0.087099,-0.493401,-0.069385,-0.037024,0.134606,-0.026171,-0.026171,-0.026171,-0.087099,-0.058601,-0.064216,-0.026171,-0.037024,-0.188246,-0.026171,-0.208741,-0.423319,-0.026171,-0.421067,,-0.282537,,-0.037024,-0.131946,1.35414,-0.405169,-0.134606,-0.045361,-0.069385,-0.131946,-0.026171,-0.206949,-0.406313,-0.083017,-0.414285,-0.026171,-0.328124,,-0.058601,-0.134606,1.376781,-0.394805,-0.163415,1.509747,,-1.217365,-0.309888,0.325803,1.410829,-1.278381,-0.098363,,-0.333219,0.372492,-0.139784,-0.026171,-0.875802,1.120584,-0.129235,-0.064216,-0.045361,1.166845,-0.894259,-0.156667,,-0.161194,-0.215785,0.33701,-0.17827,-0.037024,-0.161194,-0.422194,3.434957,-1.370546,-0.163415,-0.421067,-0.335749,-0.316477,-0.230986,-0.645902,-0.161194,0.092875,-0.114788,-0.152018,-0.195909,-0.180304,0.402876,-0.163415,-0.288554,-0.30154,-0.313261,0.149645,-0.111688,-0.069385,-0.037024,-0.052396,-0.444486,-0.643774,-0.186288,-0.026171,0.263722,-0.262234,-0.137218,-0.045361,-0.026171,-0.627611,1.188943,-0.120201,0.514836,1.107431,-0.240978,0.78947,1.227165,0.163723,-0.211381,1.220838,-1.006528,-0.16561,,-0.318574,-0.147237,-0.154359,-0.10185,-0.098363,-0.058601,-0.026171,,0.600289,-0.592968,1.913642,-0.152018,-0.117811,-0.946303,0.823223,-0.114788,-0.253172,-0.07873,-0.600353,-0.242277,0.931034,1.567811,-0.840903,-0.242277,0.311618,0.63151,-0.098363,0.33701,-0.184312,-0.045361,-0.242277,-0.07873,0.317784,-0.156667,-0.069385,-0.242277,-0.144792,-0.25622,-0.751918,-0.070337,-0.359202,-0.116299,-0.270116,-0.068668,-0.045361,,-0.037024,0.069385,-0.346999,-0.195909,-0.087099,0.488031,-0.037024,-0.037024,-0.186288,-0.026171,0.195909,-0.087658,-0.052396,,-0.301858,-0.174141,-0.037024,-0.058601,-0.058601,-0.07873,-0.045361,-0.272522,-0.052396,-0.091003,-0.117811,-0.30589,,,,,,,,,,,,-0.026171,-0.026171,-0.026171,,,,,,,,,,,,,-0.037024,,,,,,,,,0.783979,-0.222645,-0.222645,-0.220946,-0.220946,-0.220946,-0.222645,-0.222645,-0.222645,,-0.037024,,,,,,,-0.087099,-0.087099,-0.293684,,-0.037024,-0.045361,-0.026171,,-0.037024,-0.154359,-0.215785,0.550784,-0.058601,-0.129235,-0.105227,-0.069385,-0.037024,-0.07873,-0.037024,-0.058601,-0.120762,,,,,,,,,-0.026171,,,,,,,-0.214042,4.828804,-0.219237,-0.222645,-0.21052,-0.208741,-0.215785,-0.214042,-0.215785,-0.203325,-0.214042,-0.206949,-0.21052,-0.214042,-0.214042,-0.214042,-0.212287,-0.205144,-0.201493,-0.208741,-0.058601,-0.052396,-0.045361,-0.026171,,-0.037024,-0.026171,-0.045361,-0.045361,-0.037024,-0.058601,-0.037024,-0.064216,-0.052396,-0.045361,-0.026171,-0.026171,-0.058601,-0.058601,-0.045361,-0.052396,-0.064216,-0.026171,-0.064216,-0.026171,,-0.058601,-0.037024,-0.037024,-0.037024,-0.037024,-0.058601,,-0.037024,-0.037024,-0.026171,-0.037024,-0.037024,,-0.026171,-0.058601,-0.045361,-0.026171,-0.026171,-0.026171,-0.026171,-0.026171,-0.026171,-0.064216,,-0.026171,-0.026171,-0.037024,-0.045361,-0.026171,-0.026171,-0.026171,-0.037024,-0.037024,-0.026171,-0.052396,-0.052396,-0.026171,-0.037024,,-0.026171,,,,-0.037024,-0.091003,,,,,-0.53916,0.90924,-0.372492,-0.058601,,,-0.152018,-0.094752,-0.037024,,,,-0.045361,,,,,-0.242277,,,,,,-0.27107,,,,1.226074,-0.045361,,-0.026171,-0.926976,-0.201493,-0.037024,,-0.154359,-0.058601,,,,,,0.03236,1.499874,0.159335,0.594835,1.647115,-0.141029,223500.0


In [415]:
houses_train.info()
houses_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Columns: 537 entries, MSSubClass30 to SalePrice
dtypes: float64(537)
memory usage: 6.0 MB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1459 entries, 0 to 1458
Columns: 537 entries, MSSubClass30 to SalePrice
dtypes: float64(536), int64(1)
memory usage: 6.0 MB


Delete first column

In [416]:
# houses_train.drop('Unnamed: 0', axis=1, inplace=True, errors='raise')
# houses_test.drop('Unnamed: 0', axis=1, inplace=True, errors='raise')

# Run xgboost

### Create private training & test set

In [417]:
from sklearn.model_selection import train_test_split

seed = 10
test_ratio = 0.2

X = houses_train.loc[:, houses_train.columns != "SalePrice"].values # convert to np.array
y = houses_train.loc[:, houses_train.columns == "SalePrice"].values

# Take log of SalePrice
y = np.log(y + 1).ravel() # convert to 1D array for model fit (xxx, )


In [418]:
X_pr_train, X_pr_test, y_pr_train, y_pr_test = train_test_split(X, y, test_size=test_ratio, random_state=seed)

In [419]:
print(len(X_pr_train), "train +", len(X_pr_test), "test")

1168 train + 292 test


### Fit Model

In [420]:
from xgboost import XGBRegressor

xgb_clf = XGBRegressor(max_depth=3, 
                        learning_rate=0.1, 
                        n_estimators=1000, # Number of boosted trees to fit
                        silent=False, # print messages while running 
                        objective='reg:linear', 
                        booster='gbtree', # Specify which booster to use: gbtree, gblinear or dart
                        #for dart see http://xgboost.readthedocs.io/en/latest/tutorials/dart.html 
                        n_jobs=-1, # Number of parallel threads used to run xgboost. (replaces nthread)
                        gamma=0,  # Minimum loss reduction required to make a further partition on a leaf node of the tree.
                        min_child_weight=1, # Minimum sum of instance weight(hessian) needed in a child
                        max_delta_step=0, # Maximum delta step we allow each tree’s weight estimation to be
                        subsample=1, # Subsample ratio of the training instance
                        colsample_bytree=1, # Subsample ratio of columns when constructing each tree
                        colsample_bylevel=1, # Subsample ratio of columns for each split, in each level
                        reg_alpha=0, # L1 regularization term on weights
                        reg_lambda=1, # L2 regularization term on weights
                        scale_pos_weight=1, # Balancing of positive and negative weights
                        base_score=0.5, # The initial prediction score of all instances, global bias
                        random_state=743, 
                        missing=None) # Value in the data which needs to be present as a missing value. If None, defaults to np.nan



In [421]:
xgb_clf.fit(X_pr_train, y_pr_train)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=1000,
       n_jobs=-1, nthread=None, objective='reg:linear', random_state=743,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=False, subsample=1)

In [422]:
# make predictions for test data

y_pr_pred = xgb_clf.predict(X_pr_test)

Evaluate predictions

In [423]:
from sklearn.metrics import mean_squared_error

mse = mean_squared_error(y_pr_test, y_pr_pred)
rmse = np.sqrt(mse)
rmse


0.12132884492961547

In [424]:
y_pr_test[1:5]
y_pr_pred[1:5]

array([ 12.1428719 ,  11.8277435 ,  12.01067193,  12.64109979])

array([ 12.2483902 ,  11.85323715,  11.84269428,  12.66669178], dtype=float32)

Save model to file

In [425]:
from sklearn.externals import joblib # More memory efficient than pickle for large numpy arrays

joblib.dump(xgb_clf, './Models/xgboost_model.pkl') 

['./Models/xgboost_model.pkl']

To load a model:

In [426]:
# xgb_clf_loaded = joblib.load('./Models/xgboost_model.pkl') 

### Model Tuning

In [427]:
from sklearn.model_selection import GridSearchCV

xgb_params = {'max_depth': 3,
              'learning_rate': 0.1, 
              'n_estimators': 100, 
              'objective': 'reg:linear'}

param_grid = {'max_depth': [3, 4, 5], 
              'learning_rate': [0.07], 
              'n_estimators': [1500, 1800, 2000, 2200, 2400], # Number of boosted trees to fit
              'objective': ['reg:linear'], 
              'booster': ['gbtree'], # Specify which booster to use: gbtree, gblinear or dart
              'n_jobs': [-1], # Number of parallel threads used to run xgboost. (replaces nthread)
              'gamma': [0, 0.5],  # Minimum loss reduction required to make a further partition on a leaf node of the tree.
              'min_child_weight': [1], # Minimum sum of instance weight(hessian) needed in a child
              'max_delta_step': [0], # Maximum delta step we allow each tree’s weight estimation to be
              'subsample': [1], # Subsample ratio of the training instance
              'colsample_bytree': [0.6], # Subsample ratio of columns when constructing each tree
              'colsample_bylevel': [0.3], # Subsample ratio of columns for each split, in each level
              'reg_alpha': [0], # L1 regularization term on weights
              'reg_lambda': [1], # L2 regularization term on weights
              'scale_pos_weight': [1], # Balancing of positive and negative weights
              'base_score': [0.5], # The initial prediction score of all instances, global bias
              'silent': [True],
              'random_state': [10]}

optimized_xgb_clf = GridSearchCV(XGBRegressor(**xgb_params), # scikit-learn estimator interface 
                                 param_grid = param_grid, # Dictionary with parameters names (string) as keys
                                 scoring="neg_mean_squared_error", # controls what metric they apply to the estimators evaluated
                                 n_jobs=-1, # If True, the data is assumed to be identically distributed across the folds,
                                 iid=True, 
                                 refit=True, # Refit an estimator using the best found parameters (best_estimator_)
                                 cv=3, #integer, to specify the number of folds in a (Stratified)KFold. None -> default 3-fold cross validation
                                 verbose=10, # the higher, the more messages
                                 pre_dispatch="2*n_jobs", # number of jobs that get dispatched during parallel execution
                                 error_score="raise", 
                                 return_train_score=False) #If False, the cv_results_ attribute will not include training scores

In [428]:
# param_grid = {'max_depth': [3], 
#               'learning_rate': [0.05], 
#               'n_estimators': [2000], # Number of boosted trees to fit
#               'objective': ['reg:linear'], 
#               'booster': ['gbtree'], # Specify which booster to use: gbtree, gblinear or dart
#               'n_jobs': [-1], # Number of parallel threads used to run xgboost. (replaces nthread)
#               'gamma': [0],  # Minimum loss reduction required to make a further partition on a leaf node of the tree.
#               'min_child_weight': [1], # Minimum sum of instance weight(hessian) needed in a child
#               'max_delta_step': [0], # Maximum delta step we allow each tree’s weight estimation to be
#               'subsample': [1], # Subsample ratio of the training instance
#               'colsample_bytree': [0.6], # Subsample ratio of columns when constructing each tree
#               'colsample_bylevel': [0.3], # Subsample ratio of columns for each split, in each level
#               'reg_alpha': [0], # L1 regularization term on weights
#               'reg_lambda': [1], # L2 regularization term on weights
#               'scale_pos_weight': [1], # Balancing of positive and negative weights
#               'base_score': [0.5], # The initial prediction score of all instances, global bias
#               'silent': [True],
#               'random_state': [10]}

Inspect the grid

In [None]:
optimized_xgb_clf

GridSearchCV(cv=3, error_score='raise',
       estimator=XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid={'max_depth': [3, 4, 5], 'learning_rate': [0.07], 'n_estimators': [1500, 1800, 2000, 2200, 2400], 'objective': ['reg:linear'], 'booster': ['gbtree'], 'n_jobs': [-1], 'gamma': [0, 0.5], 'min_child_weight': [1], 'max_delta_step': [0], 'subsample': [1], 'colsample_bytree': [0.6], 'colsample_bylevel': [0.3], 'reg_alpha': [0], 'reg_lambda': [1], 'scale_pos_weight': [1], 'base_score': [0.5], 'silent': [True], 'random_state': [10]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=Fals

Run grid tuning

In [None]:
optimized_xgb_clf.fit(X_pr_train, y_pr_train)

Fitting 3 folds for each of 30 candidates, totalling 90 fits
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.6, gamma=0, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.6, gamma=0, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1800, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV] base_score=0.5, booster=gbtree, colsample_bylevel=0.3, colsample_bytree=0.6, gamma=0, learning_rate=0.07, max_delta_step=0, max_depth=3, min_child_weight=1, n_estimators=1500, n_jobs=-1, objective=reg:linear, random_state=10, reg_alpha=0, reg_lambda=1, scale_pos_weight=1, silent=True, subsample=1 
[CV] base_score=0.5, booste

The best combination of parameters is:

In [None]:
optimized_xgb_clf.best_params_

In [None]:
## The best score is
optimized_xgb_clf.best_score_ # that's the training score so not meaningful

In [None]:
# optimized_xgb_clf.cv_results_

In [None]:
# make predictions for test data

y_pr_pred = optimized_xgb_clf.predict(X_pr_test)

Evaluate predictions

In [None]:
from sklearn.metrics import mean_squared_error

mse = mean_squared_error(y_pr_test, y_pr_pred)
rmse = np.sqrt(mse)
rmse


In [None]:
y_pr_test[1:5]
y_pr_pred[1:5]