In [17]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy.stats import norm
from sklearn.preprocessing import StandardScaler
from scipy import stats
%matplotlib inline


from sklearn.linear_model import ElasticNet, Lasso,  BayesianRidge, LassoLarsIC, LogisticRegression
from sklearn.ensemble import RandomForestRegressor,  GradientBoostingRegressor
from sklearn.kernel_ridge import KernelRidge
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import RobustScaler
from sklearn.base import BaseEstimator, TransformerMixin, RegressorMixin, clone
from sklearn.model_selection import KFold, cross_val_score, train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error
import xgboost as xgb

In [18]:
X = pd.read_csv('/Users/pol.molinas/Downloads/Kaggle/House Price/Data Input/train_final.csv')
y = pd.read_csv('/Users/pol.molinas/Downloads/Kaggle/House Price/Data Input/y_train_final.csv')['SalePrice']
test = pd.read_csv('/Users/pol.molinas/Downloads/Kaggle/House Price/Data Input/test_final.csv')

# 1st Round of Models

In [20]:
gb = make_pipeline(RobustScaler(), GradientBoostingRegressor(n_estimators=3000, learning_rate=0.05,
                                   max_depth=4, max_features='sqrt',
                                   min_samples_leaf=15, min_samples_split=10, 
                                   loss='huber', random_state =5))
lasso = make_pipeline(RobustScaler(), Lasso(alpha =0.5, random_state=1,max_iter=5000))
model_xgb = make_pipeline(RobustScaler(), xgb.XGBRegressor(colsample_bytree=0.4603, gamma=0.0468, 
                             learning_rate=0.05, max_depth=3, 
                             min_child_weight=1.7817, n_estimators=2200,
                             reg_alpha=0.4640, reg_lambda=0.8571,
                             subsample=0.5213, silent=1,
                             random_state =7, nthread = -1))
ENet = make_pipeline(RobustScaler(), ElasticNet(alpha=0.0005, l1_ratio=.9, random_state=3))

# Grid Search 

In [51]:
param_grid = {'learning_rate':[0.04,0.05,0.06],'max_depth':[2,4,6]}
search_gb = GridSearchCV(GradientBoostingRegressor(n_estimators=3000,loss='huber', 
                                                   min_samples_leaf=15,
                                                   min_samples_split=10, 
                                                   random_state =5),
                       param_grid = param_grid
                       ,cv=3)
gb_pipe_search = make_pipeline(RobustScaler(),search_gb)
search_gb.fit(X,y)
search_gb.best_params_

KeyboardInterrupt: 

In [None]:
param_grid = {'learning_rate':[0.04,0.05,0.06],'max_depth':[2,4,6]
             }
search_xgb =GridSearchCV(xgb.XGBRegressor(colsample_bytree=0.4603, gamma=0.0468, 
                             min_child_weight=1.7817, n_estimators=2200,
                             reg_alpha=0.4640, reg_lambda=0.8571,
                             subsample=0.5213, silent=1,
                             random_state =7, nthread = -1),param_grid=param_grid,cv=3)
xgb_pipe_search = make_pipeline(RobustScaler(),search_xgb)
search_xgb.fit(X,y)
search_xgb.best_params_

In [None]:
param_grid = {'alpha':np.linspace(0.0005,0.01,100),'l1_ratio':[0.3,0.5,0.9]}
search_enet = GridSearchCV(ElasticNet(random_state=3),param_grid=param_grid,cv=3)
enet_pipe_search = make_pipeline(RobustScaler(),search_enet)
search_enet.fit(X,y)
search_enet.best_params_

# Optimized Models

In [None]:
gb = make_pipeline(RobustScaler(), GradientBoostingRegressor(n_estimators=3000, learning_rate=0.05,
                                   max_depth=4, max_features='sqrt',
                                   min_samples_leaf=15, min_samples_split=10, 
                                   loss='huber', random_state =5))
lasso = make_pipeline(RobustScaler(), Lasso(alpha =0.5, random_state=1,max_iter=5000))
model_xgb = make_pipeline(RobustScaler(), xgb.XGBRegressor(colsample_bytree=0.4603, gamma=0.0468, 
                             learning_rate=0.05, max_depth=3, 
                             min_child_weight=1.7817, n_estimators=2200,
                             reg_alpha=0.4640, reg_lambda=0.8571,
                             subsample=0.5213, silent=1,
                             random_state =7, nthread = -1))
ENet = make_pipeline(RobustScaler(), ElasticNet(alpha=0.0005, l1_ratio=.9, random_state=3))

# Averaging Models Class

In [24]:
class AveragingModels(BaseEstimator, RegressorMixin, TransformerMixin):
    def __init__(self, models):
        self.models = models
        
    # we define clones of the original models to fit the data in
    def fit(self, X, y):
        self.models_ = [clone(x) for x in self.models]
        
        # Train cloned base models
        for model in self.models_:
            model.fit(X, y)

        return self
    
    #Now we do the predictions for cloned models and average them
    def predict(self, X):
        predictions = np.column_stack([
            model.predict(X) for model in self.models_
        ])
        return np.mean(predictions, axis=1) 

# Models Fit

In [25]:
avg = AveragingModels(models = [gb, model_xgb, ENet])

In [26]:
avg_search = AveragingModels(models = [gb_pipe_search, enet_pipe_search, xgb_pipe_search])

In [27]:
avg.fit(X,y)

AveragingModels(models=[Pipeline(memory=None,
     steps=[('robustscaler', RobustScaler(copy=True, quantile_range=(25.0, 75.0), with_centering=True,
       with_scaling=True)), ('gradientboostingregressor', GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.05, lo...False, precompute=False,
      random_state=3, selection='cyclic', tol=0.0001, warm_start=False))])])

In [28]:
mean_squared_error(avg.predict(X),y)

0.005314563139834841

# Write Submission

In [None]:
#sub = pd.DataFrame()
#sub['Id'] = test_ID
#sub['SalePrice'] = np.expm1(avg.predict(test))
#sub.to_csv('submission7.csv',index=False)