# Ames Housing Data

#### Imports

In [1]:
import os
import math
import warnings

In [2]:
# Default Libraries
import numpy             as np
import pandas            as pd
import matplotlib.pyplot as plt
import seaborn           as sns

# Model Selection
import lightgbm as lgb

In [3]:
# Preprocessing
from sklearn.preprocessing   import RobustScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics         import mean_squared_error
from scipy.special           import boxcox1p

# Model Selection
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.pipeline        import make_pipeline
from xgboost                 import XGBRegressor
from sklearn.ensemble        import GradientBoostingRegressor
from sklearn.linear_model    import Lasso, ElasticNet
from sklearn.kernel_ridge    import KernelRidge
from mlxtend.regressor       import StackingCVRegressor

In [4]:
%matplotlib inline

In [5]:
pd.set_option( 'display.max_columns', None )
sns.set( rc = { 'figure.figsize' : ( 10, 5 ) } )
warnings.filterwarnings( 'ignore' )

#### Global Functions

In [6]:
def get_numerical_features( df ):
    return df.select_dtypes( include = [ 'int64', 'float64' ] ).columns

def get_categorical_features( df ):
    return df.select_dtypes( include = [ 'object' ] ).columns

def return_features_with_null( df ):
    still_missing = pd.DataFrame( len( df[ get_categorical_features ] ) - df[ get_categorical_features ].count() )
    return pd.DataFrame( still_missing[ still_missing[ 0 ] > 0 ] )

def return_rows_with_null( df ):
    null_columns = df.columns[ df.isnull().any() ]
    print( pd.DataFrame( df[ df.isnull().any( axis = 1 ) ][ null_columns ].head( 10 ) ) )

def na_heatmap( df ):
    df      = df[ sorted( df.columns ) ]
    fig, ax = plt.subplots( figsize = ( 25, 5 ) )
    sns.heatmap( df.isnull(), yticklabels = False, cbar = False )

#### Load Data

In [7]:
data_path = os.getcwd() + '\\..\\..\\..\\data\\ames_housing\\'
train_raw = pd.read_csv( data_path + 'train.csv' )
test_raw  = pd.read_csv( data_path + 'test.csv' )

#### Explore the Data

###### Data Overview

In [8]:
print( "Train: {} \nTest: {}".format( train_raw.shape, test_raw.shape ) )

Train: (1460, 81) 
Test: (1459, 80)


In [9]:
# train_raw.head(3)

In [10]:
# train_raw.describe()

In [11]:
# facet_grid = pd.melt( train_raw, value_vars = sorted( get_numerical_features( train_raw ) ) )
# grid_plot  = sns.FacetGrid( facet_grid, col = 'variable', col_wrap = 10, sharex = False, sharey = False)
# grid_plot.map( sns.distplot, 'value' )

In [12]:
# facet_grid = pd.melt( train_raw, value_vars = sorted( get_categorical_features( train_raw ) ) )
# grid_plot  = sns.FacetGrid( facet_grid, col = 'variable', col_wrap = 10, sharex = False, sharey = False )
# grid_plot  = grid_plot.map( sns.countplot, 'value' )

# plt.xticks( rotation = 'vertical' )
# [ plt.setp( ax.get_xticklabels(), rotation = 60 ) for ax in grid_plot.axes.flat ]
# grid_plot.fig.tight_layout()

In [13]:
# plt.subplots( figsize = ( 15, 15 ) )
# sns.heatmap( train_raw.corr(), vmax = 1, square = True, cmap = 'magma', linecolor = 'white', linewidth = 0.1 )

###### Correct Outliers

In [14]:
# sns.scatterplot( x = 'GrLivArea', y = 'SalePrice', data = train_raw )

In [15]:
train_raw = train_raw.drop( train_raw[ ( train_raw[ 'GrLivArea' ] > 4000 ) & ( train_raw[ 'SalePrice' ] < 250000 ) ].index )

In [16]:
train_X = train_raw.drop( [ 'SalePrice', 'Id' ], axis = 1 )
train_y = train_raw[ 'SalePrice' ]
test_X  = test_raw.drop( [ 'Id' ], axis = 1 )

###### Transform Response

In [17]:
# sns.distplot( train_y, bins = 75 )

In [18]:
print( 'Skew: {} \nKurtosis: {}'.format( round( train_y.skew(), 4 ), 
                                         round( train_y.kurtosis(), 4 ) ) )

Skew: 1.8813 
Kurtosis: 6.5231


In [19]:
train_y = np.log1p( train_y )
# sns.distplot( train_y, bins = 75 )

In [20]:
print( 'Skew: {} \nKurtosis: {}'.format( round( train_y.skew(), 4 ), 
                                         round( train_y.kurtosis(), 4 ) ) )

Skew: 0.1216 
Kurtosis: 0.8048


In [21]:
full_X    = pd.concat( [train_X, test_X] )
train_end = len( train_X )
test_end  = len( full_X )

print( full_X.shape )

(2917, 79)


In [22]:
# na_heatmap( full_X )

#### Data Preprocessing

###### Replace Missing Values

In [23]:
# na_heatmap( full_X )

In [24]:
fill_with_none = [ 'Alley', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'BsmtQual', 'Fence', 'FireplaceQu', 
                   'GarageCond', 'GarageFinish', 'GarageQual', 'GarageType', 'MasVnrType', 'MiscFeature', 'MSSubClass', 'PoolQC' ]

fill_with_zero = [ 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtFullBath', 'BsmtHalfBath', 'BsmtUnfSF', 'GarageArea', 'GarageCars', 'TotalBsmtSF', 'GarageYrBlt', 'MasVnrArea' ]

full_X[ fill_with_none ] = full_X[ fill_with_none ].fillna( 'None' )
full_X[ fill_with_zero ] = full_X[ fill_with_zero ].fillna( 0 )

###### Drop Useless Feature

In [25]:
len( full_X[ full_X[ 'Utilities' ] == 'AllPub' ] ) / len( full_X )

0.9989715461090161

In [26]:
full_X = full_X.drop( [ 'Utilities' ], axis = 1 )

###### Impute Remaining NaN

In [27]:
# Kaggle says NA is Typ
full_X[ 'Functional' ] = full_X["Functional"].fillna( 'Typ' )

In [28]:
missing_with_mode           = [ 'Electrical', 'KitchenQual', 'Exterior1st', 'Exterior2nd', 'SaleType', 'MSZoning' ]
full_X[ missing_with_mode ] = full_X[ missing_with_mode ].fillna( full_X.mode().iloc[0] )

In [29]:
full_X[ 'LotFrontage' ] = full_X.groupby( 'Neighborhood' )[ 'LotFrontage' ].transform( lambda x: x.fillna( x.median() ) )

In [30]:
# return_features_with_null( full_X )

In [31]:
# return_rows_with_null( full_X )

In [32]:
full_X[ 'MSSubClass' ]  = full_X['MSSubClass'].apply(str)
full_X[ 'OverallCond' ] = full_X['OverallCond'].astype(str)
full_X[ 'YrSold' ]      = full_X['YrSold'].astype(str)
full_X[ 'MoSold' ]      = full_X['MoSold'].astype(str)

###### Create Ranked Features

In [33]:
full_X[ 'Alley'        ].replace( { 'None' : 0, 'Grvl' : 1, 'Pave' : 2 }, inplace = True )
full_X[ 'BsmtCond'     ].replace( { 'None' : 0, 'Po' : 1, 'Fa' : 2, 'TA' : 3, 'Gd' : 4 }, inplace = True )
full_X[ 'BsmtExposure' ].replace( { 'None' : 0, 'No' : 1, 'Mn' : 2, 'Av' : 3, 'Gd' : 4 }, inplace = True )
full_X[ 'BsmtFinType1' ].replace( { 'None' : 0, 'Unf' : 1, 'LwQ' : 2, 'Rec' : 3, 'BLQ' : 4, 'ALQ' : 5, 'GLQ' : 6 }, inplace = True )
full_X[ 'BsmtFinType2' ].replace( { 'None' : 0, 'Unf' : 1, 'LwQ' : 2, 'Rec' : 3, 'BLQ' : 4, 'ALQ' : 5, 'GLQ' : 6 }, inplace = True )
full_X[ 'BsmtQual'     ].replace( { 'None' : 0, 'Po' : 1, 'Fa' : 2, 'TA' : 3, 'Gd' : 4, 'Ex' : 5 }, inplace = True )
full_X[ 'ExterCond'    ].replace( { 'None' : 0, 'Po' : 1, 'Fa' : 2, 'TA' : 3, 'Gd' : 4, 'Ex' : 5 }, inplace = True )
full_X[ 'ExterQual'    ].replace( { 'None' : 0, 'Po' : 1, 'Fa' : 2, 'TA' : 3, 'Gd' : 4, 'Ex' : 5 }, inplace = True )
full_X[ 'Fence'        ].replace( { 'None' : 0, 'MnWw' : 1, 'GdWo' : 2, 'MnPrv' : 3, 'GdPrv' : 4 }, inplace = True )
full_X[ 'FireplaceQu'  ].replace( { 'None' : 0, 'Po' : 1, 'Fa' : 2, 'TA' : 3, 'Gd' : 4, 'Ex' : 5 }, inplace = True )
full_X[ 'Functional'   ].replace( { 'None' : 0, 'Sal' : 1, 'Sev' : 2, 'Maj2' : 3, 'Maj1' : 4, 'Mod' : 5, 'Min2' : 6, 'Min1' : 7, 'Typ' : 8 }, inplace = True )
full_X[ 'GarageCond'   ].replace( { 'None' : 0, 'Po' : 1, 'Fa' : 2, 'TA' : 3, 'Gd' : 4, 'Ex' : 5 }, inplace = True )
full_X[ 'GarageFinish' ].replace( { 'None' : 0, 'Unf' : 1, 'RFn' : 2, 'Fin' : 3 }, inplace = True )
full_X[ 'GarageQual'   ].replace( { 'None' : 0, 'Po' : 1, 'Fa' : 2, 'TA' : 3, 'Gd' : 4, 'Ex' : 5 }, inplace = True )
full_X[ 'HeatingQC'    ].replace( { 'None' : 0, 'Po' : 1, 'Fa' : 2, 'TA' : 3, 'Gd' : 4, 'Ex' : 5 }, inplace = True )
full_X[ 'KitchenQual'  ].replace( { 'None' : 0, 'Po' : 1, 'Fa' : 2, 'TA' : 3, 'Gd' : 4, 'Ex' : 5 }, inplace = True )
full_X[ 'LandSlope'    ].replace( { 'None' : 0, 'Sev' : 1, 'Mod' : 2, 'Gtl' : 3 }, inplace = True )
full_X[ 'LandContour'  ].replace( { 'None' : 0, 'Low' : 1, 'HLS' : 2, 'Bnk' : 3, 'Lvl' : 4 }, inplace = True )
full_X[ 'LotShape'     ].replace( { 'None' : 0, 'Reg' : 1, 'IR1' : 2, 'IR2' : 3, 'IR3' : 4 }, inplace = True )
full_X[ 'PoolQC'       ].replace( { 'None' : 0, 'Fa' : 2, 'TA' : 3, 'Gd' : 4, 'Ex' : 5 }, inplace = True )
full_X[ 'PavedDrive'   ].replace( { 'None' : 0, 'N' : 1, 'P' : 2, 'Y' : 3 }, inplace = True )

###### Add Feature

In [34]:
full_X[ 'TotalLivAreaSF' ] =  full_X[ '1stFlrSF' ] + full_X[ '2ndFlrSF' ] + full_X[ 'TotalBsmtSF' ]

###### BoxCox Transformation

In [35]:
categorical_features = list( get_categorical_features( full_X ) )
numerical_features   = list( get_numerical_features( full_X ) )
skew_features        = {}

for feature in numerical_features:
    skew_features[ feature ] = full_X[ feature ].skew()
    
skew_features = pd.DataFrame( { 'Features' : list( skew_features.keys() ), 
                                'Skew'     : list( skew_features.values() ) } )

features_to_box = list( skew_features[ abs( skew_features[ 'Skew' ] ) > 0.75 ][ 'Features' ] )

In [36]:
for feature in features_to_box:
    full_X[ [feature] ] = boxcox1p( full_X[ [feature] ], 0.15 )

###### Create One-Hot-Encoded

In [37]:
full_X = pd.get_dummies( full_X, 
                         drop_first = True, 
                         prefix     = categorical_features, 
                         columns    = categorical_features )

###### Split Back to Train/Test

In [38]:
train_X = pd.DataFrame( full_X[ 0:train_end ] )
test_X  = pd.DataFrame( full_X[ train_end:test_end ] )

print( "Train: {} \nTest: {}".format( train_X.shape, test_X.shape ) )

Train: (1458, 232) 
Test: (1459, 232)


#### Find Optimal Hyperparamters / Score Models

In [39]:
full_train                       = pd.concat( [ train_X, train_y ], axis = 1 )
X_train, X_test, y_train, y_test = train_test_split( full_train.drop( [ 'SalePrice' ], axis = 1 ), 
                                                     full_train[ 'SalePrice' ], 
                                                     test_size = 0.30, 
                                                     random_state = 42 )
scaler         = RobustScaler()
X_train_scaled = scaler.fit_transform( X_train )

In [40]:
rmse_score = {
    'XGBRegressor'              : 0,
    'GradientBoostingRegressor' : 0,
    'Lasso'                     : 0,
    'KernelRidge'               : 0,
    'ElasticNet'                : 0,
    'LightGBM'                  : 0
}

###### XGBRegressor

In [42]:
# Best Hyperparameters after initial gridsearch
xgbm_grid = { 
    'colsample_bytree' : [ 0.45 ], 
    'gamma'            : [ 0.05 ], 
    'min_child_weight' : [ 1.75 ], 
    'n_estimators'     : [ 2000 ],
    'reg_alpha'        : [ 0.45 ], 
    'reg_lambda'       : [ 0.9 ],
    'subsample'        : [ 0.5 ]
}

xgbm = GridSearchCV( XGBRegressor(), cv = 5, param_grid = xgbm_grid, n_jobs = -1, scoring = 'neg_mean_squared_error', verbose = 1 )
xgbm.fit( X_train, y_train )

Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   5 | elapsed:   16.4s remaining:   24.6s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:   16.5s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
       n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=1),
       fit_params=None, iid='warn', n_jobs=-1,
       param_grid={'colsample_bytree': [0.45], 'gamma': [0.05], 'min_child_weight': [1.75], 'n_estimators': [2000], 'reg_alpha': [0.45], 'reg_lambda': [0.9], 'subsample': [0.5]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='neg_mean_squared_error', verbose=1)

In [None]:
rmse_score[ 'XGBRegressor' ] = round( math.sqrt( -xgbm.best_score_ ), 4 )

###### GradientBoostingRegressor

In [43]:
# Best Hyperparameters after initial gridsearch
gbr_grid = {
    'n_estimators'      : [ 3000 ],
    'learning_rate'     : [ 0.05 ],
    'max_depth'         : [ 4 ],
    'max_features'      : [ 'sqrt' ],
    'min_samples_leaf'  : [ 15 ], 
    'min_samples_split' : [ 10 ], 
    'loss'              : [ 'huber' ]
}

gbr = GridSearchCV( GradientBoostingRegressor(), cv = 5, param_grid = gbr_grid, n_jobs = -1, scoring = 'neg_mean_squared_error', verbose = 1 )
gbr.fit( X_train, y_train )

Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   5 | elapsed:    8.5s remaining:   12.9s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    9.9s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.1, loss='ls', max_depth=3, max_features=None,
             max_leaf_nodes=None, min_impurity_decrease=0.0,
             min_impurity_split=None, min_samples_leaf=1,
             min_sampl...=None, subsample=1.0, tol=0.0001,
             validation_fraction=0.1, verbose=0, warm_start=False),
       fit_params=None, iid='warn', n_jobs=-1,
       param_grid={'n_estimators': [3000], 'learning_rate': [0.05], 'max_depth': [4], 'max_features': ['sqrt'], 'min_samples_leaf': [15], 'min_samples_split': [10], 'loss': ['huber']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='neg_mean_squared_error', verbose=1)

In [None]:
rmse_score[ 'GradientBoostingRegressor' ] = round( math.sqrt( -gbr.best_score_ ), 4 )

###### Lasso

In [44]:
# Best Hyperparameters after initial gridsearch
lasso_grid = { 
    'alpha'    : [ 0.0005 ], 
    'max_iter' : [ 300 ]
}

lasso = GridSearchCV( Lasso(), cv = 5, param_grid = lasso_grid, n_jobs = -1, scoring = 'neg_mean_squared_error', verbose = 1 )
lasso.fit( X_train_scaled, y_train )

Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   5 | elapsed:    0.0s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.1s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False),
       fit_params=None, iid='warn', n_jobs=-1,
       param_grid={'alpha': [0.0005], 'max_iter': [300]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='neg_mean_squared_error', verbose=1)

In [45]:
rmse_score[ 'Lasso' ] = round( math.sqrt( -lasso.best_score_ ), 4 )

###### Kernel Ridge

In [46]:
# Best Hyperparameters after initial gridsearch
kernel_grid = {
    'alpha'  : [ 0.6 ], 
    'kernel' : [ 'polynomial' ], 
    'degree' : [ 2 ], 
    'coef0'  : [ 2.5 ]
}

kridge = GridSearchCV( KernelRidge(), cv = 5, param_grid = kernel_grid, n_jobs = -1, scoring = 'neg_mean_squared_error', verbose = 1 )
kridge.fit( X_train_scaled, y_train )

Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   2 out of   5 | elapsed:    0.0s remaining:    0.1s
[Parallel(n_jobs=-1)]: Done   5 out of   5 | elapsed:    0.0s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=KernelRidge(alpha=1, coef0=1, degree=3, gamma=None, kernel='linear',
      kernel_params=None),
       fit_params=None, iid='warn', n_jobs=-1,
       param_grid={'alpha': [0.6], 'kernel': ['polynomial'], 'degree': [2], 'coef0': [2.5]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='neg_mean_squared_error', verbose=1)

In [None]:
rmse_score[ 'KernelRidge' ] = round( math.sqrt( -kridge.best_score_ ), 4 )

###### Elastic Net

In [None]:
# Best Hyperparameters after initial gridsearch
elastic_grid = {
    'alpha'        : [ 0.0005 ],
    'l1_ratio'     : [ .9 ]
}

elastic = GridSearchCV( ElasticNet(), cv = 5, param_grid = elastic_grid, n_jobs = -1, scoring = 'neg_mean_squared_error', verbose = 1 )
elastic.fit( X_train_scaled, y_train )

In [None]:
rmse_score[ 'ElasticNet' ] = round( math.sqrt( -elastic.best_score_ ), 4 )

###### LightGBM

In [None]:
lgbm_grid = {
    'objective'               : [ 'regression' ],
    'num_leaves'              : [ 5 ],
    'learning_rate'           : [ 0.05 ], 
    'n_estimators'            : [ 750 ],
    'max_bin'                 : [ 50 ],
    'bagging_fraction'        : [ 0.75 ],
    'bagging_freq'            : [ 5 ], 
    'feature_fraction'        : [ 0.25 ]
    'min_data_in_leaf'        : [ 5 ], 
    'min_sum_hessian_in_leaf' : [ 10 ]
}

lgbm = GridSearchCV( lgb.LGBMRegressor(), cv = 5, param_grid = lgbm_grid, n_jobs = -1, scoring = 'neg_mean_squared_error', verbose = 1 )
lgbm.fit( X_train, y_train )

In [None]:
rmse_score[ 'LightGBM' ] = round( math.sqrt( -lgbm.best_score_ ), 4 )

###### Review RMSE Scores of Models

In [None]:
pd.DataFrame( { 'RMSE' : rmse_score }, index = rmse_score.keys() ).sort_values( by = [ 'RMSE' ], ascending = True )

#### Run Selected Model on Data

In [None]:
# XGBRegressor
# ==========
xgbm_model = XGBRegressor(
    colsample_bytree = xgbm.best_estimator_.colsample_bytree,
    gamma            = xgbm.best_estimator_.gamma,
    min_child_weight = xgbm.best_estimator_.min_child_weight,
    n_estimators     = xgbm.best_estimator_.n_estimators,
    reg_alpha        = xgbm.best_estimator_.reg_alpha,
    reg_lambda       = xgbm.best_estimator_.reg_lambda,
    subsample        = xgbm.best_estimator_.subsample
)

# GradientBoostingRegressor
# ==================================================
gbr_model = GradientBoostingRegressor( 
    n_estimators      = gbr.best_estimator_.n_estimators,
    learning_rate     = gbr.best_estimator_.learning_rate,
    max_depth         = gbr.best_estimator_.max_depth,
    max_features      = gbr.best_estimator_.max_features,
    min_samples_leaf  = gbr.best_estimator_.min_samples_leaf,
    min_samples_split = gbr.best_estimator_.min_samples_split,
    loss              = gbr.best_estimator_.loss
)

# Lasso
# ==================================================
lasso_model = make_pipeline( RobustScaler(), Lasso(
    alpha    = lasso.best_estimator_.alpha,
    max_iter = lasso.best_estimator_.max_iter
) )

# KernelRidge
# ==================================================
kridge_model = make_pipeline( RobustScaler(), KernelRidge(
    alpha  = kridge.best_estimator_.alpha,
    kernel = kridge.best_estimator_.kernel,
    degree = kridge.best_estimator_.degree,
    coef0  = kridge.best_estimator_.coef0
) )

# ElasticNet
# ==================================================
elastic_model = make_pipeline( RobustScaler(), ElasticNet(
    alpha    = elastic.best_estimator_.alpha,
    l1_ratio = elastic.best_estimator_.l1_ratio
) )

# LightGBM
# ==================================================
lgbm_model = lgb.LGBMRegressor(
    objective               = lgbm.best_estimator_.objective,
    num_leaves              = lgbm.best_estimator_.num_leaves,
    learning_rate           = lgbm.best_estimator_.learning_rate,
    n_estimators            = lgbm.best_estimator_.n_estimators,
    max_bin                 = lgbm.best_estimator_.max_bin,
    bagging_fraction        = lgbm.best_estimator_.bagging_fraction,
    bagging_freq            = lgbm.best_estimator_.bagging_freq,
    feature_fraction        = lgbm.best_estimator_.feature_fraction,
    min_data_in_leaf        = lgbm.best_estimator_.min_data_in_leaf,
    min_sum_hessian_in_leaf = lgbm.best_estimator_.min_sum_hessian_in_leaf,
)

#### Generate Models

In [None]:
stacked_model = StackingCVRegressor( regressors     = ( lasso_model, gbr_model, kridge_model, lgbm_model, xgbm_model ),
                                     meta_regressor = elastic_model,
                                     cv = 5 )

stacked_model.fit( train_X.values, train_y.values )
stacked_predict = np.expm1( stacked_model.predict( test_X.values ) )

#### Create Submission

In [None]:
submission = pd.DataFrame(
    { 'Id'        : test_raw[ 'Id' ],
      'SalePrice' : stacked_predict } 
)

submission.to_csv( '.\\ames_housing_submission.csv', index = False )
submission.head(10)