Average the results from lasso, ridge and XGBoost

In [156]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
import xgboost as xgb
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.feature_selection import SelectFromModel
from sklearn import metrics   #Additional scklearn functions
from xgboost.sklearn import XGBRegressor
from scipy.stats import norm, skew #for some statistics
from bayes_opt import BayesianOptimization
from sklearn.model_selection import  cross_val_score
from sklearn import linear_model
from sklearn.ensemble import RandomForestRegressor as RFR
from mlxtend.regressor import StackingRegressor
from sklearn.svm import SVR

In [157]:
train = pd.read_csv("./data/train_clean.csv")
test = pd.read_csv("./data/test_clean.csv")

In [158]:
#Save the 'Id' column
train_ID = train['Id']
test_ID = test['Id']

#Now drop the 'Id' colum since it's unnecessary for the prediction process.
train.drop("Id", axis = 1, inplace = True)
test.drop("Id", axis = 1, inplace = True)

ytrain = train["SalePrice"]

In [159]:
#combine data
train.drop("Unnamed: 0", axis = 1, inplace = True)
test.drop("Unnamed: 0", axis = 1, inplace = True)
ntrain = train.shape[0]
ntest = test.shape[0]
train.drop(['SalePrice'], axis=1, inplace=True)
all_data = pd.concat((train, test)).reset_index(drop=True)

In [160]:
# MSSubClass should be string
all_data["MSSubClass"] = all_data["MSSubClass"].apply(str)

In [161]:
# get all continuous variables
all_non_object = all_data.dtypes[all_data.dtypes != "object"].index.tolist()
# do not consider Year,Month and Qual as continuous
year_month = ["YearBuilt", "YearRemodAdd","GarageYrBlt","MoSold","YrSold",
              "OverallQual","OverallCond"]
# numeric_features
numeric_features = list(set(all_non_object)-set(year_month))

In [162]:
# Check the skew of all numerical features
skewed_feats = all_data[numeric_features].apply(lambda x: skew(x.dropna())).sort_values(ascending=False)

skewness = pd.DataFrame({'Skew' :skewed_feats})

# check skewness of numerical variables
skewness = skewness[abs(skewness.Skew)>0.75]
print("There are {} skewed numerical features to Box Cox transform".format(skewness.shape[0]))

from scipy.special import boxcox1p
skewed_features = skewness.index
lam = 0.15
for feat in skewed_features:
    #all_data[feat] += 1
    all_data[feat] = boxcox1p(all_data[feat], lam)

There are 19 skewed numerical features to Box Cox transform


In [163]:
qual_dict = {"None": 0, "Po": 1, "Fa": 2, "TA": 3, "Gd": 4, "Ex": 5}
all_data["ExterQual"] = all_data["ExterQual"].map(qual_dict).astype(int)
all_data["ExterCond"] = all_data["ExterCond"].map(qual_dict).astype(int)
all_data["BsmtQual"] = all_data["BsmtQual"].map(qual_dict).astype(int)
all_data["BsmtCond"] = all_data["BsmtCond"].map(qual_dict).astype(int)
all_data["HeatingQC"] = all_data["HeatingQC"].map(qual_dict).astype(int)
all_data["KitchenQual"] = all_data["KitchenQual"].map(qual_dict).astype(int)
all_data["FireplaceQu"] = all_data["FireplaceQu"].map(qual_dict).astype(int)
all_data["GarageQual"] = all_data["GarageQual"].map(qual_dict).astype(int)
all_data["GarageCond"] = all_data["GarageCond"].map(qual_dict).astype(int)

all_data["BsmtExposure"] = all_data["BsmtExposure"].map(
        {"None": 0, "No": 1, "Mn": 2, "Av": 3, "Gd": 4}).astype(int)

bsmt_fin_dict = {"None": 0, "Unf": 1, "LwQ": 2, "Rec": 3, "BLQ": 4, "ALQ": 5, "GLQ": 6}
all_data["BsmtFinType1"] = all_data["BsmtFinType1"].map(bsmt_fin_dict).astype(int)
all_data["BsmtFinType2"] = all_data["BsmtFinType2"].map(bsmt_fin_dict).astype(int)

all_data["Functional"] = all_data["Functional"].map(
        {"None": 0, "Sal": 1, "Sev": 2, "Maj2": 3, "Maj1": 4, 
         "Mod": 5, "Min2": 6, "Min1": 7, "Typ": 8}).astype(int)

all_data["GarageFinish"] = all_data["GarageFinish"].map(
        {"None": 0, "Unf": 1, "RFn": 2, "Fin": 3}).astype(int)

all_data["Fence"] = all_data["Fence"].map(
        {"None": 0, "MnWw": 1, "GdWo": 2, "MnPrv": 3, "GdPrv": 4}).astype(int)

all_data["PoolQC"] = all_data["PoolQC"].map(qual_dict).astype(int)

# Most land slopes are gentle; treat the others as "not gentle".
all_data["LandSlope"] = (all_data["LandSlope"] == "Gtl") * 1
# IR2 and IR3 don't appear that often, so just make a distinction
# between regular and irregular.
all_data["LotShape"] = (all_data["LotShape"] == "Reg") * 1
# Most properties use standard circuit breakers.
all_data["Electrical"] = (all_data["Electrical"] == "SBrkr") * 1
# Most have a paved drive. Treat dirt/gravel and partial pavement
# as "not paved".
all_data["PavedDrive"] = (all_data["PavedDrive"] == "Y") * 1

In [164]:
# # label encoding
# for c in all_data.columns:
#     if all_data[c].dtype == 'object' or c in year_month:
#         lbl = preprocessing.LabelEncoder()
#         lbl.fit(list(all_data[c].values)) 
#         all_data[c] = lbl.transform(list(all_data[c].values))

In [165]:
all_data = pd.get_dummies(all_data)
print(all_data.shape)

(2917, 233)


In [166]:
# split data
train = all_data[:ntrain]
test = all_data[ntrain:] #prediction data 

In [167]:
test_size = 0.3
X_train, X_test, y_train, y_test = train_test_split(train, 
                                                    ytrain,
                                                    train_size=1-test_size, 
                                                    test_size=test_size, 
                                                random_state=0)

In [168]:
#len(X_train)+len(X_test)


## Feature Selection

### PCA- gave a worse result specially with xgboost model

In [169]:
#Feature Scaling before applying PCA
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
test = sc.transform(test)

from sklearn.decomposition import PCA
pca = PCA(0.99)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)
test = pca.transform(test)

cum_explaind_varaince = pca.explained_variance_ratio_

In [170]:
min(cum_explaind_varaince)

0.00070330956262161341

## Base models

### XGBoost
#### XGBoost parameter optimization

In [34]:
def xgb_evaluate(min_child_weight,
                 colsample_bytree,
                 max_depth,
                 subsample,
                 gamma,
                 alpha,lambd):

    params['min_child_weight'] = int(min_child_weight)
    params['cosample_bytree'] = max(min(colsample_bytree, 1), 0)
    params['max_depth'] = int(max_depth)
    params['subsample'] = max(min(subsample, 1), 0)
    params['gamma'] = max(gamma, 0)
    params['alpha'] = max(alpha, 0)
    params['lambda'] = max(lambd, 0)


    cv_result = xgb.cv(params, xgtrain, num_boost_round=num_rounds, nfold=5,
             seed=random_state,
             callbacks=[xgb.callback.early_stop(50)])

    return -cv_result['test-rmse-mean'].values[-1]

In [139]:
#loading data into d-matrices
xgtrain = xgb.DMatrix(X_train, label=y_train)
xgtest = xgb.DMatrix(X_test, label=y_test)
xgpred = xgb.DMatrix(test)

In [36]:
num_rounds = 3000
random_state = 42
num_iter = 25
init_points = 5
params = {
        'eta': 0.1,
        'silent': 1,
        'eval_metric': 'rmse',
        'verbose_eval': True,
        'seed': random_state
    }

xgbBO = BayesianOptimization(xgb_evaluate, {'min_child_weight': (1, 10),
                                                'colsample_bytree': (0.1, 1),
                                                'max_depth': (2, 50), #changed max depth from 12 to 50
                                                'subsample': (0.5, 1),
                                                'gamma': (0, 2),
                                                'alpha': (0, 2),
                                                'lambd':(0, 2)
                                                })

xgbBO.maximize(init_points=init_points, n_iter=num_iter)

[31mInitialization[0m
[94m---------------------------------------------------------------------------------------------------------------------------------------[0m
 Step |   Time |      Value |     alpha |   colsample_bytree |     gamma |     lambd |   max_depth |   min_child_weight |   subsample | 
Multiple eval metrics have been passed: 'test-rmse' will be used for early stopping.

Will train until test-rmse hasn't improved in 50 rounds.
Stopping. Best iteration:
[107]	train-rmse:0.163366+0.00381923	test-rmse:0.175219+0.0166345

    1 | 00m13s | [35m  -0.17522[0m | [32m   0.3964[0m | [32m            0.5150[0m | [32m   1.1534[0m | [32m   0.0005[0m | [32m    32.7550[0m | [32m            4.1069[0m | [32m     0.6348[0m | 
Multiple eval metrics have been passed: 'test-rmse' will be used for early stopping.

Will train until test-rmse hasn't improved in 50 rounds.
Stopping. Best iteration:
[169]	train-rmse:0.0927082+0.00136231	test-rmse:0.149661+0.0179419

    2 | 00m2

  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-rmse' will be used for early stopping.

Will train until test-rmse hasn't improved in 50 rounds.
Stopping. Best iteration:
[176]	train-rmse:0.0033538+0.000456219	test-rmse:0.160227+0.0138716

   27 | 01m13s |   -0.16023 |    0.0000 |             1.0000 |    0.0000 |    1.6113 |     24.0992 |             1.0000 |      0.5000 | 
Multiple eval metrics have been passed: 'test-rmse' will be used for early stopping.

Will train until test-rmse hasn't improved in 50 rounds.
Stopping. Best iteration:
[465]	train-rmse:0.0752818+0.000840445	test-rmse:0.144449+0.0189678

   28 | 01m18s |   -0.14445 |    1.8663 |             0.7033 |    0.0122 |    1.6091 |     20.5341 |             9.9071 |      0.5794 | 
Multiple eval metrics have been passed: 'test-rmse' will be used for early stopping.

Will train until test-rmse hasn't improved in 50 rounds.
Stopping. Best iteration:
[306]	train-rmse:0.0003458+1.82472e-05	test-rmse:0.154658+0.0149335

   29 | 02m0

In [37]:
bayesian_params = xgbBO.res["max"]["max_params"]

In [171]:
bayesian_params = {'alpha': 0.12250868020871097,
 'colsample_bytree': 0.96,
 'gamma': 0.0055747928592750906,
 'lambd': 1.8707339442110735,
 'max_depth': 2.2076064679490948,
 'min_child_weight': 2.4462408869837908,
 'subsample': 0.7}#0.62262942029485835   'colsample_bytree': 0.95811915002042114,

In [127]:
# xgb best params with train/test
# {'min_child_weight': 3.7316987233785577,
#  'colsample_bytree': 0.18788694151572233,
#  'max_depth': 2.58392470353414,
#  'subsample': 0.6833400971272943,
#  'gamma': 0.027285193517196715,
#  'alpha': 0.04647160701094655,
#  'lambd': 1.8796361500915535}

In [173]:
params = {
    # Parameters that we are going to tune.
    'no_estimators': 4000,
    'max_depth':int(round(bayesian_params["max_depth"])),
    'min_child_weight': bayesian_params["min_child_weight"],
    'eta':.1,
    'subsample': bayesian_params['subsample'],
    'colsample_bytree': bayesian_params['colsample_bytree'],
    'gamma':bayesian_params['gamma'],
    'alpha':bayesian_params['alpha'],
    'lambda':bayesian_params["lambd"],
    # Other parameters
    'objective':'reg:linear',
}

In [174]:
num_boost_round = 3200

In [175]:
# #dont run
# cv_results = xgb.cv(
#     params,
#     xgtrain,
#     num_boost_round=num_boost_round,
#     seed=42,
#     nfold=5,
#     metrics={'rmse'},
#     early_stopping_rounds=10
# )

In [176]:
# cv_results['test-rmse-mean'].min()

Parameter ETA

In [177]:
# %time
# # This can take some time…
# min_rmse = float("Inf")
# best_params = None

# for eta in [.3, .2, .1, .05, .01, .005]:
#     print("CV with eta={}".format(eta))

#     # We update our parameters
#     params['eta'] = eta

#     # Run and time CV
#     cv_results = xgb.cv(
#         params,
#         xgtrain,
#         num_boost_round=num_boost_round,
#         seed=42,
#         nfold=5,
#         metrics=['rmse'],
#         early_stopping_rounds=10
#     )

#     # Update best score
#     mean_rmse = cv_results['test-rmse-mean'].min()
#     boost_rounds = cv_results['test-rmse-mean'].argmin()
#     print("\tRMSE {} for {} rounds\n".format(mean_rmse, boost_rounds))
#     if mean_rmse < min_rmse:
#         min_rmse = mean_rmse
#         best_params = eta

# print("Best params: {}, RMSE: {}".format(best_params, min_rmse))

In [178]:
params['eta'] = 0.01

In [179]:
params

{'alpha': 0.12250868020871097,
 'colsample_bytree': 0.96,
 'eta': 0.01,
 'gamma': 0.005574792859275091,
 'lambda': 1.8707339442110735,
 'max_depth': 2,
 'min_child_weight': 2.446240886983791,
 'no_estimators': 4000,
 'objective': 'reg:linear',
 'subsample': 0.7}

### XGBoost model

In [182]:
model_xgb = xgb.XGBRegressor(colsample_bytree=params['colsample_bytree'], 
                             gamma=params['gamma'],
                             learning_rate=params['eta'], max_depth=int(round(params['max_depth'])), 
                             min_child_weight=params['min_child_weight'], n_estimators=4000,
                             reg_alpha=params['alpha'], reg_lambda=params['lambda'],
                             subsample=params['subsample'], silent=1,
                             random_state =42, nthread = -1)
#2200
model_xgb.fit(X_train, y_train, early_stopping_rounds=10, 
             eval_set=[(X_test, y_test)], verbose=False)
  
y_train_xgb = model_xgb.predict(X_train)
y_test_xgb = model_xgb.predict(X_test)
xgb_prediction = model_xgb.predict(test)


In [183]:
pred_df = pd.DataFrame(np.exp(xgb_prediction), index=test_ID, columns=["SalePrice"])
pred_df.to_csv('./data/xgb_regressor_test_pca.csv', 
               header=True, index_label='Id')

## LASSO

#### LASSO model parameter optimization

In [142]:
lassocv = linear_model.LassoCV(cv=10, random_state=5, alphas=[0.0001, 0.0003, 0.0006, 0.001, 0.003, 0.006, 0.01, 0.03, 0.06, 0.1, 
                          0.3, 0.6, 1, 3, 6, 10, 30, 60, 100])
lassocv.fit(X_train, y_train)

y_train_las = lassocv.predict(X_train)
y_test_las = lassocv.predict(X_test)
las_prediction = lassocv.predict(test)

# lassocv_score = lassocv.score(train, ytrain)
lassocv_alpha = lassocv.alpha_



In [30]:
lassocv_alpha

0.0006

In [144]:
model_lasso = linear_model.Lasso(alpha=lassocv_alpha)

## Ridge
### Ridge model parameter optimization

In [145]:
ridgecv = linear_model.RidgeCV(cv=5, alphas=[0.0001, 0.0003, 0.0006, 0.001, 0.003, 0.006, 0.01, 0.03, 0.06, 0.1, 
                          0.3, 0.6, 1, 3, 6, 10, 30, 60, 100])
ridgecv.fit(X_train, y_train)

y_train_rdg = ridgecv.predict(X_train)
y_test_rdg = ridgecv.predict(X_test)
rdg_prediction = ridgecv.predict(test)

#ridgecv_score = ridgecv.score(train, ytrain)
ridgecv_alpha = ridgecv.alpha_

In [34]:
ridgecv_alpha

10

In [146]:
model_ridge = linear_model.Ridge(alpha=ridgecv_alpha)

In [147]:
print("Lasso RMSE on Test set :", np.sqrt(mean_squared_error(y_test,y_test_las)))
print("Ridge RMSE on Test set :", np.sqrt(mean_squared_error(y_test,y_test_rdg)))
print("XGB RMSE on Test set :", np.sqrt(mean_squared_error(y_test,y_test_xgb)))

Lasso RMSE on Test set : 0.105220989126
Ridge RMSE on Test set : 0.10753575676
XGB RMSE on Test set : 0.116164914077


In [47]:
averaged_test = (y_test_las+y_test_rdg+y_test_xgb)/3
print("Average RMSE on Test set :", np.sqrt(mean_squared_error(y_test,averaged_test)))

Average RMSE on Test set : 0.1050434414834762


In [149]:
y_pred = (las_prediction)#+rdg_prediction+xgb_prediction)/3
pred_df = pd.DataFrame(np.exp(y_pred), index=test_ID, columns=["SalePrice"])
pred_df.to_csv('./data/lasso_models.csv', 
               header=True, index_label='Id')

From here it is not working yet
## Random forest regressors
### Random forest parameters optimization

In [98]:
def rfrcv(n_estimators, min_samples_split, max_features, max_depth):
    val = cross_val_score(
        RFR(n_estimators=int(n_estimators),
            min_samples_split=int(min_samples_split),
            max_features=min(max_features, 0.999),
            max_depth=int(max_depth),
            random_state=2,
            criterion='mae',
        ),
        train.values, y=ytrain, scoring='neg_mean_squared_error', cv=5
    ).mean()
    return val

In [100]:
gp_params = {"alpha": 1e-5}
rfrBO = BayesianOptimization(
        rfrcv,
        {'n_estimators': (10, 100),
        'min_samples_split': (2, 15),
        'max_features': (0.1, 0.999),
        'max_depth':(2,12)}#min_samples_leaf
)
rfrBO.maximize(n_iter=10, **gp_params)

[31mInitialization[0m
[94m---------------------------------------------------------------------------------------------------[0m
 Step |   Time |      Value |   max_depth |   max_features |   min_samples_split |   n_estimators | 
    1 | 00m20s | [35m  -0.02239[0m | [32m     6.6050[0m | [32m        0.1979[0m | [32m             5.1960[0m | [32m       51.3081[0m | 
    2 | 00m37s | [35m  -0.02206[0m | [32m     6.5539[0m | [32m        0.2196[0m | [32m             5.8926[0m | [32m       88.3707[0m | 
    3 | 00m47s | [35m  -0.01877[0m | [32m     9.0808[0m | [32m        0.5337[0m | [32m             8.5704[0m | [32m       43.4988[0m | 
    4 | 00m36s |   -0.02446 |      5.3131 |         0.3435 |              8.6749 |        60.5250 | 
    5 | 00m39s |   -0.01884 |     10.4136 |         0.4562 |              2.8188 |        40.6731 | 
[31mBayesian Optimization[0m
[94m------------------------------------------------------------------------------------------

  " state: %s" % convergence_dict)


In [102]:
rfr_params = rfrBO.res["max"]["max_params"]
rfr_params

{'n_estimators': 43.49883128432349,
 'min_samples_split': 8.570423823608582,
 'max_features': 0.5337067867029887,
 'max_depth': 9.080765381947682}

In [106]:
model_rfr = RFR(n_estimators = int(round(rfr_params["n_estimators"])),
                criterion="mae",
               min_samples_split = int(round(rfr_params["min_samples_split"])),
               max_features = rfr_params["max_features"],
               max_depth = int(round(rfr_params["max_depth"])))


In [53]:
#Validation function
n_folds = 5

def rmsle_cv(model):
    kf = KFold(n_folds, shuffle=True, random_state=42).get_n_splits(train.values)
    rmse= np.sqrt(-cross_val_score(model, train.values, ytrain, scoring="neg_mean_squared_error", cv = kf))
    return(rmse)

In [54]:
score = rmsle_cv(model_xgb)
print("Xgboost score: {:.4f} ({:.4f})\n".format(score.mean(), score.std()))


Xgboost score: 0.1135 (0.0057)



In [59]:
score = rmsle_cv(model_lasso)
print("Lasso score: {:.4f} ({:.4f})\n".format(score.mean(), score.std()))


Lasso score: 0.1100 (0.0048)



In [77]:
score = rmsle_cv(model_ridge)
print("Ridge score: {:.4f} ({:.4f})\n".format(score.mean(), score.std()))


Ridge score: 0.1118 (0.0043)



In [107]:
score = rmsle_cv(model_rfr)
print("Random forest regressor score: {:.4f} ({:.4f})\n".format(score.mean(), score.std()))


Random forest regressor score: 0.1380 (0.0055)



## Stacked regression

In [110]:
# Parameter optimization for svr_rbf
svr_rbf = SVR(kernel='rbf')
regressors = [model_lasso, model_ridge, model_xgb,svr_rbf]
stregr = StackingRegressor(regressors=regressors, 
                           meta_regressor=)

params = {'meta-svr__C': [0.1, 1.0, 10.0, 100.0],
          'meta-svr__gamma': [0.1, 1.0, 10.0]}

grid = GridSearchCV(estimator=stregr, 
                    param_grid=params, 
                    cv=5,
                    refit=True)
grid.fit(train, ytrain)

for params, mean_score, scores in grid.grid_scores_:
        print("%0.3f +/- %0.2f %r"
              % (mean_score, scores.std() / 2.0, params))

0.915 +/- 0.00 {'meta-svr__C': 0.1, 'meta-svr__gamma': 0.1}
0.912 +/- 0.00 {'meta-svr__C': 0.1, 'meta-svr__gamma': 1.0}
0.880 +/- 0.01 {'meta-svr__C': 0.1, 'meta-svr__gamma': 10.0}
0.923 +/- 0.00 {'meta-svr__C': 1.0, 'meta-svr__gamma': 0.1}
0.916 +/- 0.00 {'meta-svr__C': 1.0, 'meta-svr__gamma': 1.0}
0.903 +/- 0.01 {'meta-svr__C': 1.0, 'meta-svr__gamma': 10.0}
0.915 +/- 0.00 {'meta-svr__C': 10.0, 'meta-svr__gamma': 0.1}
0.912 +/- 0.00 {'meta-svr__C': 10.0, 'meta-svr__gamma': 1.0}
0.906 +/- 0.00 {'meta-svr__C': 10.0, 'meta-svr__gamma': 10.0}
0.912 +/- 0.00 {'meta-svr__C': 100.0, 'meta-svr__gamma': 0.1}
0.910 +/- 0.00 {'meta-svr__C': 100.0, 'meta-svr__gamma': 1.0}
0.900 +/- 0.00 {'meta-svr__C': 100.0, 'meta-svr__gamma': 10.0}




In [113]:
grid.best_params_

{'meta-svr__C': 1.0, 'meta-svr__gamma': 0.1}

In [116]:
svr_rbf_opt = SVR(kernel='rbf', gamma=0.1, C=1.)
stregr_opt = StackingRegressor(regressors=regressors, 
                           meta_regressor=svr_rbf_opt)

In [117]:
score = rmsle_cv(stregr_opt)
print("Stacked regressors: {:.4f} ({:.4f})\n".format(score.mean(), score.std()))


Stacked regressors: 0.1109 (0.0047)



In [119]:
stregr_opt.fit(train,ytrain)

StackingRegressor(meta_regressor=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False),
         refit=True,
         regressors=[Lasso(alpha=0.00040842386526745213, copy_X=True, fit_intercept=True,
   max_iter=1000, normalize=False, positive=False, precompute=False,
   random_state=None, selection='cyclic', tol=0.0001, warm_start=False), Ridge(alpha=12.915496650148853, copy_X=True, fit_intercept=True,
   max_iter=...a=1.8796361500915535, scale_pos_weight=1, seed=None,
       silent=1, subsample=0.6833400971272943)],
         store_train_meta_features=False, use_features_in_secondary=False,
         verbose=0)

In [120]:
y_pred = stregr_opt.predict(test)
pred_df = pd.DataFrame(np.exp(y_pred), index=test_ID, columns=["SalePrice"])
pred_df.to_csv('/Users/holy/dsi/module1/stacked_regressors_test.csv', 
               header=True, index_label='Id')

In [184]:
from mlxtend.regressor import StackingCVRegressor
from sklearn.datasets import load_boston
from sklearn.svm import SVR
from sklearn.linear_model import Lasso
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score
import numpy as np

RANDOM_SEED = 42

X, y = load_boston(return_X_y=True)

svr = SVR(kernel='linear')
lasso = Lasso()
rf = RandomForestRegressor(n_estimators=5, 
                           random_state=RANDOM_SEED)

# The StackingCVRegressor uses scikit-learn's check_cv
# internally, which doesn't support a random seed. Thus
# NumPy's random seed need to be specified explicitely for
# deterministic behavior
np.random.seed(RANDOM_SEED)
stack = StackingCVRegressor(regressors=(svr, lasso, rf),
                            meta_regressor=lasso)

In [186]:
stack.get_params()

{'cv': 5,
 'lasso': Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
    normalize=False, positive=False, precompute=False, random_state=None,
    selection='cyclic', tol=0.0001, warm_start=False),
 'lasso__alpha': 1.0,
 'lasso__copy_X': True,
 'lasso__fit_intercept': True,
 'lasso__max_iter': 1000,
 'lasso__normalize': False,
 'lasso__positive': False,
 'lasso__precompute': False,
 'lasso__random_state': None,
 'lasso__selection': 'cyclic',
 'lasso__tol': 0.0001,
 'lasso__warm_start': False,
 'meta-lasso': Lasso(alpha=1.0, copy_X=True, fit_intercept=True, max_iter=1000,
    normalize=False, positive=False, precompute=False, random_state=None,
    selection='cyclic', tol=0.0001, warm_start=False),
 'meta-lasso__alpha': 1.0,
 'meta-lasso__copy_X': True,
 'meta-lasso__fit_intercept': True,
 'meta-lasso__max_iter': 1000,
 'meta-lasso__normalize': False,
 'meta-lasso__positive': False,
 'meta-lasso__precompute': False,
 'meta-lasso__random_state': None,
 'meta-lasso__select