Average the results from lasso, ridge and XGBoost

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
import xgboost as xgb
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.feature_selection import SelectFromModel
from sklearn import metrics   #Additional scklearn functions
from xgboost.sklearn import XGBRegressor
from scipy.stats import norm, skew #for some statistics
from bayes_opt import BayesianOptimization
from sklearn.model_selection import  cross_val_score
from sklearn import linear_model
from sklearn.ensemble import RandomForestRegressor as RFR
from mlxtend.regressor import StackingRegressor
from sklearn.svm import SVR

In [2]:
train = pd.read_csv("../data/train_clean.csv")
test = pd.read_csv("../data/test_clean.csv")

In [3]:
#Save the 'Id' column
train_ID = train['Id']
test_ID = test['Id']

#Now drop the 'Id' colum since it's unnecessary for the prediction process.
train.drop("Id", axis = 1, inplace = True)
test.drop("Id", axis = 1, inplace = True)

ytrain = train["SalePrice"]

In [4]:
#combine data
train.drop("Unnamed: 0", axis = 1, inplace = True)
test.drop("Unnamed: 0", axis = 1, inplace = True)
ntrain = train.shape[0]
ntest = test.shape[0]
train.drop(['SalePrice'], axis=1, inplace=True)
all_data = pd.concat((train, test)).reset_index(drop=True)

In [5]:
# MSSubClass should be string
all_data["MSSubClass"] = all_data["MSSubClass"].apply(str)

In [6]:
# get all continuous variables
all_non_object = all_data.dtypes[all_data.dtypes != "object"].index.tolist()
# do not consider Year,Month and Qual as continuous
year_month = ["YearBuilt", "YearRemodAdd","GarageYrBlt","MoSold","YrSold",
              "OverallQual","OverallCond"]
# numeric_features
numeric_features = list(set(all_non_object)-set(year_month))

In [7]:
# Check the skew of all numerical features
skewed_feats = all_data[numeric_features].apply(lambda x: skew(x.dropna())).sort_values(ascending=False)

skewness = pd.DataFrame({'Skew' :skewed_feats})

# check skewness of numerical variables
skewness = skewness[abs(skewness.Skew)>0.75]
print("There are {} skewed numerical features to Box Cox transform".format(skewness.shape[0]))

from scipy.special import boxcox1p
skewed_features = skewness.index
lam = 0.15
for feat in skewed_features:
    #all_data[feat] += 1
    all_data[feat] = boxcox1p(all_data[feat], lam)

There are 19 skewed numerical features to Box Cox transform


In [8]:
qual_dict = {"None": 0, "Po": 1, "Fa": 2, "TA": 3, "Gd": 4, "Ex": 5}
all_data["ExterQual"] = all_data["ExterQual"].map(qual_dict).astype(int)
all_data["ExterCond"] = all_data["ExterCond"].map(qual_dict).astype(int)
all_data["BsmtQual"] = all_data["BsmtQual"].map(qual_dict).astype(int)
all_data["BsmtCond"] = all_data["BsmtCond"].map(qual_dict).astype(int)
all_data["HeatingQC"] = all_data["HeatingQC"].map(qual_dict).astype(int)
all_data["KitchenQual"] = all_data["KitchenQual"].map(qual_dict).astype(int)
all_data["FireplaceQu"] = all_data["FireplaceQu"].map(qual_dict).astype(int)
all_data["GarageQual"] = all_data["GarageQual"].map(qual_dict).astype(int)
all_data["GarageCond"] = all_data["GarageCond"].map(qual_dict).astype(int)

all_data["BsmtExposure"] = all_data["BsmtExposure"].map(
        {"None": 0, "No": 1, "Mn": 2, "Av": 3, "Gd": 4}).astype(int)

bsmt_fin_dict = {"None": 0, "Unf": 1, "LwQ": 2, "Rec": 3, "BLQ": 4, "ALQ": 5, "GLQ": 6}
all_data["BsmtFinType1"] = all_data["BsmtFinType1"].map(bsmt_fin_dict).astype(int)
all_data["BsmtFinType2"] = all_data["BsmtFinType2"].map(bsmt_fin_dict).astype(int)

all_data["Functional"] = all_data["Functional"].map(
        {"None": 0, "Sal": 1, "Sev": 2, "Maj2": 3, "Maj1": 4, 
         "Mod": 5, "Min2": 6, "Min1": 7, "Typ": 8}).astype(int)

all_data["GarageFinish"] = all_data["GarageFinish"].map(
        {"None": 0, "Unf": 1, "RFn": 2, "Fin": 3}).astype(int)

all_data["Fence"] = all_data["Fence"].map(
        {"None": 0, "MnWw": 1, "GdWo": 2, "MnPrv": 3, "GdPrv": 4}).astype(int)

all_data["PoolQC"] = all_data["PoolQC"].map(qual_dict).astype(int)

# Most land slopes are gentle; treat the others as "not gentle".
all_data["LandSlope"] = (all_data["LandSlope"] == "Gtl") * 1
# IR2 and IR3 don't appear that often, so just make a distinction
# between regular and irregular.
all_data["LotShape"] = (all_data["LotShape"] == "Reg") * 1
# Most properties use standard circuit breakers.
all_data["Electrical"] = (all_data["Electrical"] == "SBrkr") * 1
# Most have a paved drive. Treat dirt/gravel and partial pavement
# as "not paved".
all_data["PavedDrive"] = (all_data["PavedDrive"] == "Y") * 1

In [28]:
# # label encoding
# for c in all_data.columns:
#     if all_data[c].dtype == 'object' or c in year_month:
#         lbl = preprocessing.LabelEncoder()
#         lbl.fit(list(all_data[c].values)) 
#         all_data[c] = lbl.transform(list(all_data[c].values))

In [9]:
all_data = pd.get_dummies(all_data)
print(all_data.shape)

(2917, 233)


In [10]:
# split data
train = all_data[:ntrain]
test = all_data[ntrain:] #prediction data 

In [11]:
test_size = 0.3
X_train, X_test, y_train, y_test = train_test_split(train, 
                                                    ytrain,
                                                    train_size=1-test_size, 
                                                    test_size=test_size, 
                                                random_state=0)

## Base models

### XGBoost
#### XGBoost parameter optimization

In [12]:
def xgb_evaluate(min_child_weight,
                 colsample_bytree,
                 max_depth,
                 subsample,
                 gamma,
                 alpha,lambd):

    params['min_child_weight'] = int(min_child_weight)
    params['cosample_bytree'] = max(min(colsample_bytree, 1), 0)
    params['max_depth'] = int(max_depth)
    params['subsample'] = max(min(subsample, 1), 0)
    params['gamma'] = max(gamma, 0)
    params['alpha'] = max(alpha, 0)
    params['lambda'] = max(lambd, 0)


    cv_result = xgb.cv(params, xgtrain, num_boost_round=num_rounds, nfold=5,
             seed=random_state,
             callbacks=[xgb.callback.early_stop(50)])

    return -cv_result['test-rmse-mean'].values[-1]

In [24]:
#loading data into d-matrices
xgtrain = xgb.DMatrix(X_train, label=y_train)
xgtest = xgb.DMatrix(X_test, label=y_test)
xgpred = xgb.DMatrix(test)

In [14]:
num_rounds = 3000
random_state = 42
num_iter = 25
init_points = 5
params = {
        'eta': 0.1,
        'silent': 1,
        'eval_metric': 'rmse',
        'verbose_eval': True,
        'seed': random_state
    }

xgbBO = BayesianOptimization(xgb_evaluate, {'min_child_weight': (1, 10),
                                                'colsample_bytree': (0.1, 1),
                                                'max_depth': (2, 12),
                                                'subsample': (0.5, 1),
                                                'gamma': (0, 2),
                                                'alpha': (0, 2),
                                                'lambd':(0, 2)
                                                })

xgbBO.maximize(init_points=init_points, n_iter=num_iter)

[31mInitialization[0m
[94m---------------------------------------------------------------------------------------------------------------------------------------[0m
 Step |   Time |      Value |     alpha |   colsample_bytree |     gamma |     lambd |   max_depth |   min_child_weight |   subsample | 
Multiple eval metrics have been passed: 'test-rmse' will be used for early stopping.

Will train until test-rmse hasn't improved in 50 rounds.
Stopping. Best iteration:
[184]	train-rmse:0.144988+0.00294021	test-rmse:0.160464+0.0236221

    1 | 00m08s | [35m  -0.16046[0m | [32m   1.8862[0m | [32m            0.9723[0m | [32m   0.8843[0m | [32m   1.0377[0m | [32m     8.8073[0m | [32m            8.5348[0m | [32m     0.9537[0m | 
Multiple eval metrics have been passed: 'test-rmse' will be used for early stopping.

Will train until test-rmse hasn't improved in 50 rounds.
Stopping. Best iteration:
[290]	train-rmse:0.135499+0.00241771	test-rmse:0.15077+0.0204325

    2 | 00m07s

  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-rmse' will be used for early stopping.

Will train until test-rmse hasn't improved in 50 rounds.
Stopping. Best iteration:
[311]	train-rmse:0.0767028+0.00351455	test-rmse:0.118518+0.0160332

   13 | 00m34s |   -0.11852 |    0.2397 |             0.1000 |    0.0000 |    2.0000 |      2.0000 |             5.1742 |      0.5000 | 




Multiple eval metrics have been passed: 'test-rmse' will be used for early stopping.

Will train until test-rmse hasn't improved in 50 rounds.
Stopping. Best iteration:
[311]	train-rmse:0.0862326+0.00264444	test-rmse:0.123621+0.0164946

   14 | 00m32s |   -0.12362 |    0.1076 |             0.3275 |    0.0577 |    1.9842 |      2.0418 |             1.5912 |      0.8701 | 
Multiple eval metrics have been passed: 'test-rmse' will be used for early stopping.

Will train until test-rmse hasn't improved in 50 rounds.
Stopping. Best iteration:
[389]	train-rmse:0.0426506+0.000403274	test-rmse:0.12483+0.0136338

   15 | 00m47s |   -0.12483 |    0.1268 |             0.4652 |    0.0145 |    1.7713 |     11.9014 |             1.3155 |      0.5356 | 


  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-rmse' will be used for early stopping.

Will train until test-rmse hasn't improved in 50 rounds.
Stopping. Best iteration:
[371]	train-rmse:0.0703016+0.00342546	test-rmse:0.11905+0.0125093

   16 | 00m34s |   -0.11905 |    0.0956 |             0.7475 |    0.0115 |    1.7356 |      2.3746 |             6.8153 |      0.5157 | 
Multiple eval metrics have been passed: 'test-rmse' will be used for early stopping.

Will train until test-rmse hasn't improved in 50 rounds.
Stopping. Best iteration:
[300]	train-rmse:0.074621+0.0028524	test-rmse:0.118992+0.0148225

   17 | 00m31s |   -0.11899 |    0.0718 |             0.8846 |    0.0265 |    1.8061 |      2.1945 |             4.8041 |      0.5272 | 
Multiple eval metrics have been passed: 'test-rmse' will be used for early stopping.

Will train until test-rmse hasn't improved in 50 rounds.
Stopping. Best iteration:
[304]	train-rmse:0.0700302+0.00302086	test-rmse:0.11772+0.0146846

   18 | 00m32s | [

  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-rmse' will be used for early stopping.

Will train until test-rmse hasn't improved in 50 rounds.
Stopping. Best iteration:
[1175]	train-rmse:0.0899236+0.00178118	test-rmse:0.137567+0.0171291

   19 | 01m00s |   -0.13757 |    1.9360 |             0.5390 |    0.0288 |    1.8564 |      8.8035 |             1.9628 |      0.5110 | 
Multiple eval metrics have been passed: 'test-rmse' will be used for early stopping.

Will train until test-rmse hasn't improved in 50 rounds.
Stopping. Best iteration:
[210]	train-rmse:0.058804+0.00062095	test-rmse:0.120038+0.0154993

   20 | 00m38s |   -0.12004 |    0.2190 |             0.9470 |    0.0243 |    0.1052 |     10.4680 |             9.7027 |      0.5303 | 
Multiple eval metrics have been passed: 'test-rmse' will be used for early stopping.

Will train until test-rmse hasn't improved in 50 rounds.
Stopping. Best iteration:
[504]	train-rmse:0.0682994+0.00116291	test-rmse:0.127588+0.0149521

   21 | 00m42s 

  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-rmse' will be used for early stopping.

Will train until test-rmse hasn't improved in 50 rounds.
Stopping. Best iteration:
[556]	train-rmse:0.106516+0.00223467	test-rmse:0.13499+0.0189078

   25 | 00m50s |   -0.13499 |    2.0000 |             1.0000 |    0.0485 |    2.0000 |     12.0000 |            10.0000 |      0.5000 | 
Multiple eval metrics have been passed: 'test-rmse' will be used for early stopping.

Will train until test-rmse hasn't improved in 50 rounds.
Stopping. Best iteration:
[152]	train-rmse:0.0444772+0.00165467	test-rmse:0.12271+0.0155399

   26 | 00m39s |   -0.12271 |    0.0064 |             0.8739 |    0.0032 |    1.5279 |      7.1312 |             9.6030 |      0.5160 | 
Multiple eval metrics have been passed: 'test-rmse' will be used for early stopping.

Will train until test-rmse hasn't improved in 50 rounds.
Stopping. Best iteration:
[133]	train-rmse:0.0612004+0.00293504	test-rmse:0.126049+0.013692

   27 | 00m37s |   

  " state: %s" % convergence_dict)


Multiple eval metrics have been passed: 'test-rmse' will be used for early stopping.

Will train until test-rmse hasn't improved in 50 rounds.
Stopping. Best iteration:
[152]	train-rmse:0.0383384+0.00128678	test-rmse:0.123665+0.0125537

   29 | 00m38s |   -0.12366 |    0.0000 |             1.0000 |    0.0000 |    0.0000 |      4.9337 |             1.0000 |      0.5000 | 
Multiple eval metrics have been passed: 'test-rmse' will be used for early stopping.

Will train until test-rmse hasn't improved in 50 rounds.
Stopping. Best iteration:
[132]	train-rmse:0.0347894+0.00155219	test-rmse:0.122797+0.0156056

   30 | 00m37s |   -0.12280 |    0.0000 |             1.0000 |    0.0000 |    1.9580 |      9.9668 |             5.4795 |      0.5000 | 


In [15]:
bayesian_params = xgbBO.res["max"]["max_params"]

In [16]:
bayesian_params

{'min_child_weight': 2.6088424342829697,
 'colsample_bytree': 0.16110241153357344,
 'max_depth': 2.6088722740012296,
 'subsample': 0.5242784376438836,
 'gamma': 0.017567433812438527,
 'alpha': 0.007361616117448788,
 'lambd': 1.8548925645780876}

In [15]:
# xgb best params with train/test
# {'min_child_weight': 3.7316987233785577,
#  'colsample_bytree': 0.18788694151572233,
#  'max_depth': 2.58392470353414,
#  'subsample': 0.6833400971272943,
#  'gamma': 0.027285193517196715,
#  'alpha': 0.04647160701094655,
#  'lambd': 1.8796361500915535}

{'min_child_weight': 3.7316987233785577,
 'colsample_bytree': 0.18788694151572233,
 'max_depth': 2.58392470353414,
 'subsample': 0.6833400971272943,
 'gamma': 0.027285193517196715,
 'alpha': 0.04647160701094655,
 'lambd': 1.8796361500915535}

In [17]:
params = {
    # Parameters that we are going to tune.
    'max_depth':int(round(bayesian_params["max_depth"])),
    'min_child_weight': bayesian_params["min_child_weight"],
    'eta':.1,
    'subsample': bayesian_params['subsample'],
    'colsample_bytree': bayesian_params['colsample_bytree'],
    'gamma':bayesian_params['gamma'],
    'alpha':bayesian_params['alpha'],
    'lambda':bayesian_params["lambd"],
    # Other parameters
    'objective':'reg:linear',
}

In [19]:
num_boost_round = 3200

In [18]:
# #dont run
# cv_results = xgb.cv(
#     params,
#     xgtrain,
#     num_boost_round=num_boost_round,
#     seed=42,
#     nfold=5,
#     metrics={'rmse'},
#     early_stopping_rounds=10
# )

In [20]:
# cv_results['test-rmse-mean'].min()

0.1173928

Parameter ETA

In [21]:
%time
# This can take some time…
min_rmse = float("Inf")
best_params = None

for eta in [.3, .2, .1, .05, .01, .005]:
    print("CV with eta={}".format(eta))

    # We update our parameters
    params['eta'] = eta

    # Run and time CV
    cv_results = xgb.cv(
        params,
        xgtrain,
        num_boost_round=num_boost_round,
        seed=42,
        nfold=5,
        metrics=['rmse'],
        early_stopping_rounds=10
    )

    # Update best score
    mean_rmse = cv_results['test-rmse-mean'].min()
    boost_rounds = cv_results['test-rmse-mean'].argmin()
    print("\tRMSE {} for {} rounds\n".format(mean_rmse, boost_rounds))
    if mean_rmse < min_rmse:
        min_rmse = mean_rmse
        best_params = eta

print("Best params: {}, RMSE: {}".format(best_params, min_rmse))

CPU times: user 4 µs, sys: 0 ns, total: 4 µs
Wall time: 8.11 µs
CV with eta=0.3


will be corrected to return the positional minimum in the future.
Use 'series.values.argmin' to get the position of the minimum now.


	RMSE 0.13783099999999998 for 121 rounds

CV with eta=0.2
	RMSE 0.12996259999999998 for 113 rounds

CV with eta=0.1
	RMSE 0.1215394 for 207 rounds

CV with eta=0.05
	RMSE 0.11942060000000002 for 420 rounds

CV with eta=0.01
	RMSE 0.11675060000000001 for 1715 rounds

CV with eta=0.005
	RMSE 0.11678439999999998 for 3074 rounds

Best params: 0.01, RMSE: 0.11675060000000001


In [22]:
params['eta'] = 0.01

In [23]:
params

{'max_depth': 3,
 'min_child_weight': 2.6088424342829697,
 'eta': 0.01,
 'subsample': 0.5242784376438836,
 'colsample_bytree': 0.16110241153357344,
 'gamma': 0.017567433812438527,
 'alpha': 0.007361616117448788,
 'lambda': 1.8548925645780876,
 'objective': 'reg:linear'}

### XGBoost model

In [25]:
model_xgb = xgb.XGBRegressor(colsample_bytree=params['colsample_bytree'], 
                             gamma=params['gamma'], 
                             learning_rate=params['eta'], max_depth=int(round(params['max_depth'])), 
                             min_child_weight=params['min_child_weight'], n_estimators=2200,
                             reg_alpha=params['alpha'], reg_lambda=params['lambda'],
                             subsample=params['subsample'], silent=1,
                             random_state =42, nthread = -1)

model_xgb.fit(X_train, y_train, early_stopping_rounds=10, 
             eval_set=[(X_test, y_test)], verbose=False)
  
y_train_xgb = model_xgb.predict(X_train)
y_test_xgb = model_xgb.predict(X_test)
xgb_prediction = model_xgb.predict(test)


## LASSO

#### LASSO model parameter optimization

In [29]:
lassocv = linear_model.LassoCV(cv=10, random_state=5, alphas=[0.0001, 0.0003, 0.0006, 0.001, 0.003, 0.006, 0.01, 0.03, 0.06, 0.1, 
                          0.3, 0.6, 1, 3, 6, 10, 30, 60, 100])
lassocv.fit(X_train, y_train)

y_train_las = lassocv.predict(X_train)
y_test_las = lassocv.predict(X_test)
las_prediction = lassocv.predict(test)

# lassocv_score = lassocv.score(train, ytrain)
lassocv_alpha = lassocv.alpha_



In [30]:
lassocv_alpha

0.0006

In [31]:
model_lasso = linear_model.Lasso(alpha=lassocv_alpha)

## Ridge
### Ridge model parameter optimization

In [33]:
ridgecv = linear_model.RidgeCV(cv=5, alphas=[0.0001, 0.0003, 0.0006, 0.001, 0.003, 0.006, 0.01, 0.03, 0.06, 0.1, 
                          0.3, 0.6, 1, 3, 6, 10, 30, 60, 100])
ridgecv.fit(X_train, y_train)

y_train_rdg = ridgecv.predict(X_train)
y_test_rdg = ridgecv.predict(X_test)
rdg_prediction = ridgecv.predict(test)

#ridgecv_score = ridgecv.score(train, ytrain)
ridgecv_alpha = ridgecv.alpha_

In [34]:
ridgecv_alpha

10

In [35]:
model_ridge = linear_model.Ridge(alpha=ridgecv_alpha)

In [42]:
print("Lasso RMSE on Test set :", np.sqrt(mean_squared_error(y_test,y_test_las)))
print("Ridge RMSE on Test set :", np.sqrt(mean_squared_error(y_test,y_test_rdg)))
print("XGB RMSE on Test set :", np.sqrt(mean_squared_error(y_test,y_test_xgb)))

Lasso RMSE on Test set : 0.10522098912607274
Ridge RMSE on Test set : 0.10753575676011559
XGB RMSE on Test set : 0.11379053007977571


In [47]:
averaged_test = (y_test_las+y_test_rdg+y_test_xgb)/3
print("Average RMSE on Test set :", np.sqrt(mean_squared_error(y_test,averaged_test)))

Average RMSE on Test set : 0.1050434414834762


In [48]:
y_pred = (las_prediction+rdg_prediction+xgb_prediction)/3
pred_df = pd.DataFrame(np.exp(y_pred), index=test_ID, columns=["SalePrice"])
pred_df.to_csv('/Users/holy/dsi/module1/averaged_models.csv', 
               header=True, index_label='Id')

From here it is not working yet
## Random forest regressors
### Random forest parameters optimization

In [98]:
def rfrcv(n_estimators, min_samples_split, max_features, max_depth):
    val = cross_val_score(
        RFR(n_estimators=int(n_estimators),
            min_samples_split=int(min_samples_split),
            max_features=min(max_features, 0.999),
            max_depth=int(max_depth),
            random_state=2,
            criterion='mae',
        ),
        train.values, y=ytrain, scoring='neg_mean_squared_error', cv=5
    ).mean()
    return val

In [100]:
gp_params = {"alpha": 1e-5}
rfrBO = BayesianOptimization(
        rfrcv,
        {'n_estimators': (10, 100),
        'min_samples_split': (2, 15),
        'max_features': (0.1, 0.999),
        'max_depth':(2,12)}#min_samples_leaf
)
rfrBO.maximize(n_iter=10, **gp_params)

[31mInitialization[0m
[94m---------------------------------------------------------------------------------------------------[0m
 Step |   Time |      Value |   max_depth |   max_features |   min_samples_split |   n_estimators | 
    1 | 00m20s | [35m  -0.02239[0m | [32m     6.6050[0m | [32m        0.1979[0m | [32m             5.1960[0m | [32m       51.3081[0m | 
    2 | 00m37s | [35m  -0.02206[0m | [32m     6.5539[0m | [32m        0.2196[0m | [32m             5.8926[0m | [32m       88.3707[0m | 
    3 | 00m47s | [35m  -0.01877[0m | [32m     9.0808[0m | [32m        0.5337[0m | [32m             8.5704[0m | [32m       43.4988[0m | 
    4 | 00m36s |   -0.02446 |      5.3131 |         0.3435 |              8.6749 |        60.5250 | 
    5 | 00m39s |   -0.01884 |     10.4136 |         0.4562 |              2.8188 |        40.6731 | 
[31mBayesian Optimization[0m
[94m------------------------------------------------------------------------------------------

  " state: %s" % convergence_dict)


In [102]:
rfr_params = rfrBO.res["max"]["max_params"]
rfr_params

{'n_estimators': 43.49883128432349,
 'min_samples_split': 8.570423823608582,
 'max_features': 0.5337067867029887,
 'max_depth': 9.080765381947682}

In [106]:
model_rfr = RFR(n_estimators = int(round(rfr_params["n_estimators"])),
                criterion="mae",
               min_samples_split = int(round(rfr_params["min_samples_split"])),
               max_features = rfr_params["max_features"],
               max_depth = int(round(rfr_params["max_depth"])))


In [29]:
#Validation function
n_folds = 5

def rmsle_cv(model):
    kf = KFold(n_folds, shuffle=True, random_state=42).get_n_splits(train.values)
    rmse= np.sqrt(-cross_val_score(model, train.values, ytrain, scoring="neg_mean_squared_error", cv = kf))
    return(rmse)

In [30]:
score = rmsle_cv(model_xgb)
print("Xgboost score: {:.4f} ({:.4f})\n".format(score.mean(), score.std()))


Xgboost score: 0.1126 (0.0060)



In [59]:
score = rmsle_cv(model_lasso)
print("Lasso score: {:.4f} ({:.4f})\n".format(score.mean(), score.std()))


Lasso score: 0.1100 (0.0048)



In [77]:
score = rmsle_cv(model_ridge)
print("Ridge score: {:.4f} ({:.4f})\n".format(score.mean(), score.std()))


Ridge score: 0.1118 (0.0043)



In [107]:
score = rmsle_cv(model_rfr)
print("Random forest regressor score: {:.4f} ({:.4f})\n".format(score.mean(), score.std()))


Random forest regressor score: 0.1380 (0.0055)



## Stacked regression

In [110]:
# Parameter optimization for svr_rbf
svr_rbf = SVR(kernel='rbf')
regressors = [model_lasso, model_ridge, model_xgb]
stregr = StackingRegressor(regressors=regressors, 
                           meta_regressor=svr_rbf)

params = {'meta-svr__C': [0.1, 1.0, 10.0, 100.0],
          'meta-svr__gamma': [0.1, 1.0, 10.0]}

grid = GridSearchCV(estimator=stregr, 
                    param_grid=params, 
                    cv=5,
                    refit=True)
grid.fit(train, ytrain)

for params, mean_score, scores in grid.grid_scores_:
        print("%0.3f +/- %0.2f %r"
              % (mean_score, scores.std() / 2.0, params))

0.915 +/- 0.00 {'meta-svr__C': 0.1, 'meta-svr__gamma': 0.1}
0.912 +/- 0.00 {'meta-svr__C': 0.1, 'meta-svr__gamma': 1.0}
0.880 +/- 0.01 {'meta-svr__C': 0.1, 'meta-svr__gamma': 10.0}
0.923 +/- 0.00 {'meta-svr__C': 1.0, 'meta-svr__gamma': 0.1}
0.916 +/- 0.00 {'meta-svr__C': 1.0, 'meta-svr__gamma': 1.0}
0.903 +/- 0.01 {'meta-svr__C': 1.0, 'meta-svr__gamma': 10.0}
0.915 +/- 0.00 {'meta-svr__C': 10.0, 'meta-svr__gamma': 0.1}
0.912 +/- 0.00 {'meta-svr__C': 10.0, 'meta-svr__gamma': 1.0}
0.906 +/- 0.00 {'meta-svr__C': 10.0, 'meta-svr__gamma': 10.0}
0.912 +/- 0.00 {'meta-svr__C': 100.0, 'meta-svr__gamma': 0.1}
0.910 +/- 0.00 {'meta-svr__C': 100.0, 'meta-svr__gamma': 1.0}
0.900 +/- 0.00 {'meta-svr__C': 100.0, 'meta-svr__gamma': 10.0}




In [113]:
grid.best_params_

{'meta-svr__C': 1.0, 'meta-svr__gamma': 0.1}

In [116]:
svr_rbf_opt = SVR(kernel='rbf', gamma=0.1, C=1.)
stregr_opt = StackingRegressor(regressors=regressors, 
                           meta_regressor=svr_rbf_opt)

In [117]:
score = rmsle_cv(stregr_opt)
print("Stacked regressors: {:.4f} ({:.4f})\n".format(score.mean(), score.std()))


Stacked regressors: 0.1109 (0.0047)



In [119]:
stregr_opt.fit(train,ytrain)

StackingRegressor(meta_regressor=SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.1,
  kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False),
         refit=True,
         regressors=[Lasso(alpha=0.00040842386526745213, copy_X=True, fit_intercept=True,
   max_iter=1000, normalize=False, positive=False, precompute=False,
   random_state=None, selection='cyclic', tol=0.0001, warm_start=False), Ridge(alpha=12.915496650148853, copy_X=True, fit_intercept=True,
   max_iter=...a=1.8796361500915535, scale_pos_weight=1, seed=None,
       silent=1, subsample=0.6833400971272943)],
         store_train_meta_features=False, use_features_in_secondary=False,
         verbose=0)

In [120]:
y_pred = stregr_opt.predict(test)
pred_df = pd.DataFrame(np.exp(y_pred), index=test_ID, columns=["SalePrice"])
pred_df.to_csv('/Users/holy/dsi/module1/stacked_regressors_test.csv', 
               header=True, index_label='Id')