In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_squared_log_error as mse
from sklearn.metrics import make_scorer
import lightgbm as lgbm
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import *
import pickle

sns.set(style="darkgrid", context="notebook")
rand_seed = 1009
np.random.seed(rand_seed)
xsize, ysize = 12.0, 8.0

import warnings
warnings.filterwarnings('ignore')

In [15]:
X_train = pd.read_csv("../data/X_train.csv", index_col=False)
X_test = pd.read_csv("../data/X_test.csv", index_col=False)
y_train = np.genfromtxt("../data/y_train.txt")
train_id = np.genfromtxt("../data/train_id.txt").astype(int)
test_id = np.genfromtxt("../data/test_id.txt").astype(int)

In [3]:
def log_rmse(y_true, y_pred):
    y_pred[y_pred < 0.0] = 0.0
    return np.sqrt(mse(y_true, y_pred))

log_rmse_scorer = make_scorer(log_rmse, greater_is_better=False)

In [4]:
models = {}
model_names = ["elastic_net", "lightgbm", "linear", "ridge", "theil_sen"]

for i, model_name in enumerate(model_names):
    file = open("../models/"+model_name+"_regression.pkl", "rb")
    models[model_name] = pickle.loads(file.read())
    file.close()

In [6]:
base_pred = np.zeros((len(y_train), len(model_names)))

for i, model_name in enumerate(model_names):
    base_pred[:, i] = models[model_name].predict(X_train)

In [8]:
stacked_reg = LinearRegression(fit_intercept=True, normalize=False, copy_X=True, n_jobs=None)
stacked_reg.fit(base_pred, y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [9]:
y_pred = stacked_reg.predict(base_pred)
print("RMSE: "+str("%.5f"%log_rmse(y_train, y_pred)))

RMSE: 0.07618


In [16]:
base_pred = np.zeros((len(X_test), len(model_names)))

for i, model_name in enumerate(model_names):
    base_pred[:, i] = models[model_name].predict(X_test)

y_pred = stacked_reg.predict(base_pred)

In [17]:
submission_df = pd.DataFrame(data={"Id":test_id, "SalePrice":y_pred})
submission_df.to_csv("../submissions/5_base_stacked_model_24_21_2018.csv", index=False)