In [None]:
Stacking with a Neural Network

In [1]:
from __future__ import division
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
 
from sklearn.datasets import fetch_california_housing
 
#From within an ipython notebook
%matplotlib inline
 
cali_housing = fetch_california_housing()
 
X = cali_housing.data
y = cali_housing.target

bins = np.arange(6)
binned_y = np.digitize(y, bins)

In [3]:
from sklearn.model_selection import train_test_split
X_train_prin, X_test_prin, y_train_prin, y_test_prin = train_test_split(X, y,test_size=0.2,stratify=binned_y,random_state=7)

binned_y_train_prin = np.digitize(y_train_prin, bins)

X_1, X_stack, y_1, y_stack = train_test_split(X_train_prin,y_train_prin,test_size=0.33,stratify=binned_y_train_prin,random_state=7 )

In [4]:
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPRegressor
 
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
 
mlp_pipe = Pipeline(steps=[('scale', StandardScaler()), ('neural_net', MLPRegressor())])
 
 
param_grid = {'neural_net__alpha': [0.02,0.01,0.005],
               'neural_net__hidden_layer_sizes' : [(50,50,50)],
               'neural_net__activation': ['relu'],
               'neural_net__solver' : ['adam']
               }
 
neural_net_gs = GridSearchCV(mlp_pipe, param_grid = param_grid,cv=3, n_jobs=-1)
neural_net_gs.fit(X_1, y_1)

GridSearchCV(cv=3, error_score='raise',
       estimator=Pipeline(memory=None,
     steps=[('scale', StandardScaler(copy=True, with_mean=True, with_std=True)), ('neural_net', MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learnin...=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False))]),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid={'neural_net__alpha': [0.02, 0.01, 0.005], 'neural_net__activation': ['relu'], 'neural_net__solver': ['adam'], 'neural_net__hidden_layer_sizes': [(50, 50, 50)]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=0)

In [5]:
neural_net_gs.best_params_

{'neural_net__activation': 'relu',
 'neural_net__alpha': 0.005,
 'neural_net__hidden_layer_sizes': (50, 50, 50),
 'neural_net__solver': 'adam'}

In [6]:
neural_net_gs.best_score_

0.77494880692463708

In [7]:
nn_best = neural_net_gs.best_estimator_
import pickle
 
f = open('nn_best.save', 'wb')
pickle.dump(nn_best, f, protocol = pickle.HIGHEST_PROTOCOL)
f.close()

In [8]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import GradientBoostingRegressor
 
 
param_grid = {'learning_rate': [0.1,0.05,0.03,0.01],
               'loss': ['huber'],
               'max_depth': [5,7,10],
               'max_features': [0.4,0.6,0.8,1.0],
               'min_samples_leaf': [2,3,5],
               'n_estimators': [100],
               'warm_start': [True], 'random_state':[7]
               }
 
boost_gs = RandomizedSearchCV(GradientBoostingRegressor(), param_distributions = param_grid,cv=3, n_jobs=-1,n_iter=25)
boost_gs.fit(X_1, y_1)

RandomizedSearchCV(cv=3, error_score='raise',
          estimator=GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.1, loss='ls', max_depth=3, max_features=None,
             max_leaf_nodes=None, min_impurity_decrease=0.0,
             min_impurity_split=None, min_samples_leaf=1,
             min_samples_split=2, min_weight_fraction_leaf=0.0,
             n_estimators=100, presort='auto', random_state=None,
             subsample=1.0, verbose=0, warm_start=False),
          fit_params=None, iid=True, n_iter=25, n_jobs=-1,
          param_distributions={'n_estimators': [100], 'loss': ['huber'], 'warm_start': [True], 'random_state': [7], 'max_features': [0.4, 0.6, 0.8, 1.0], 'learning_rate': [0.1, 0.05, 0.03, 0.01], 'max_depth': [5, 7, 10], 'min_samples_leaf': [2, 3, 5]},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score=True, scoring=None, verbose=0)

In [9]:
boost_gs.best_score_

0.82477941207056227

In [10]:
boost_gs.best_params_

{'learning_rate': 0.1,
 'loss': 'huber',
 'max_depth': 10,
 'max_features': 0.4,
 'min_samples_leaf': 2,
 'n_estimators': 100,
 'random_state': 7,
 'warm_start': True}

In [11]:
gbt_inst = GradientBoostingRegressor(**{'learning_rate': 0.1,
 'loss': 'huber',
 'max_depth': 10,
 'max_features': 0.4,
 'min_samples_leaf': 5,
 'n_estimators': 4000,
 'warm_start': True, 'random_state':7}).fit(X_1, y_1)

In [12]:
def pickle_func(filename, saved_object):
    import pickle
 
    f = open(filename, 'wb')
    pickle.dump(saved_object, f, protocol = pickle.HIGHEST_PROTOCOL)
    f.close()
     
    return None
 
pickle_func('grad_boost.save', gbt_inst)

In [13]:
from sklearn.ensemble import BaggingRegressor,GradientBoostingRegressor
from sklearn.model_selection import RandomizedSearchCV
 
 
param_dist = {
     'max_samples': [0.5,1.0],
     'max_features' : [0.5,1.0],
     'oob_score' : [True, False],
     'base_estimator__min_samples_leaf': [4,5],
     'n_estimators': [20]}
 
 

single_estimator = GradientBoostingRegressor(**{'learning_rate': 0.1,
 'loss': 'huber',
 'max_depth': 10,
 'max_features': 0.4,
 'n_estimators': 20,
 'warm_start': True, 'random_state':7})
 
ensemble_estimator = BaggingRegressor(base_estimator = single_estimator)
 
 
pre_gs_inst_bag = RandomizedSearchCV(ensemble_estimator,
  param_distributions = param_dist,
  cv=3,
  n_iter = 5,
  n_jobs=-1)
 
pre_gs_inst_bag.fit(X_1, y_1)

RandomizedSearchCV(cv=3, error_score='raise',
          estimator=BaggingRegressor(base_estimator=GradientBoostingRegressor(alpha=0.9, criterion='friedman_mse', init=None,
             learning_rate=0.1, loss='huber', max_depth=10,
             max_features=0.4, max_leaf_nodes=None,
             min_impurity_decrease=0.0, min_impurity_split=None,
       ...n_estimators=10, n_jobs=1, oob_score=False,
         random_state=None, verbose=0, warm_start=False),
          fit_params=None, iid=True, n_iter=5, n_jobs=-1,
          param_distributions={'max_features': [0.5, 1.0], 'max_samples': [0.5, 1.0], 'oob_score': [True, False], 'n_estimators': [20], 'base_estimator__min_samples_leaf': [4, 5]},
          pre_dispatch='2*n_jobs', random_state=None, refit=True,
          return_train_score=True, scoring=None, verbose=0)

In [14]:
pre_gs_inst_bag.best_score_

0.78178711839468873

In [15]:
pickle_func('bag_gbm.save', pre_gs_inst_bag.best_estimator_)

In [19]:
def pickle_load_func(filename):
    f = open(filename, 'rb')
    to_return = pickle.load(f)
    f.close()
    
    return to_return

neural_net = pickle_load_func('nn_best.save')
gbt = pickle_load_func('grad_boost.save')
bag_gbm = pickle_load_func('bag_gbm.save')

In [20]:
def handle_X_set(X_train_set_in):
    X_train_set = X_train_set_in.copy()
    
    y_pred_nn = neural_net.predict(X_train_set)
    y_pred_gbt = gbt.predict(X_train_set)
    y_pred_bag = bag_gbm.predict(X_train_set)
    
    
    preds_df = pd.DataFrame(columns = ['nn', 'gbt','bag'])

    preds_df['nn'] = y_pred_nn
    preds_df['gbt'] = y_pred_gbt
    preds_df['bag'] = y_pred_bag
 
    return preds_df

def predict_from_X_set(X_train_set_in):
    X_train_set = X_train_set_in.copy()    
    return final_etr.predict(handle_X_set(X_train_set)) 

preds_df = handle_X_set(X_stack)
print (preds_df.corr())

           nn       gbt       bag
nn   1.000000  0.950080  0.964221
gbt  0.950080  1.000000  0.981978
bag  0.964221  0.981978  1.000000


In [21]:
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.model_selection import RandomizedSearchCV
 
param_dist = {'max_features' : ['sqrt','log2',1.0],
  'min_samples_leaf' : [1, 2, 3, 7, 11],
  'n_estimators': [50, 100],
  'oob_score': [True, False]}
 
pre_gs_inst = RandomizedSearchCV(ExtraTreesRegressor(warm_start=True,bootstrap=True,random_state=7),
  param_distributions = param_dist,
  cv=3,
  n_iter = 15,random_state=7)
 
pre_gs_inst.fit(preds_df.values, y_stack)

pre_gs_inst.best_params_

{'max_features': 1.0,
 'min_samples_leaf': 11,
 'n_estimators': 100,
 'oob_score': False}

In [22]:
final_etr = ExtraTreesRegressor(**{'max_features': 1.0,
  'min_samples_leaf': 11,
  'n_estimators': 3000,
  'oob_score': False, 'random_state':7}).fit(preds_df.values, y_stack)

In [23]:
from sklearn.model_selection import cross_val_score

cross_val_score(final_etr, preds_df.values, y_stack, cv=3).mean()

0.82358648513122856

In [24]:
y_pred = predict_from_X_set(X_test_prin)
 
from sklearn.metrics import r2_score, mean_absolute_error
 
print "R-squared",r2_score(y_test_prin, y_pred)
print "MAE : ",mean_absolute_error(y_test_prin, y_pred)
print "MAPE : ",(np.abs(y_test_prin- y_pred)/y_test_prin).mean()

R-squared 0.840571369604
MAE :  0.301872336955
MAPE :  0.167229414954


['MedInc',
 'HouseAge',
 'AveRooms',
 'AveBedrms',
 'Population',
 'AveOccup',
 'Latitude',
 'Longitude']

In [43]:
def handle_X_set_sp(X_train_set_in):
    X_train_set = X_train_set_in.copy()
    
    y_pred_nn = neural_net.predict(X_train_set)
    y_pred_gbt = gbt.predict(X_train_set)
    y_pred_bag = bag_gbm.predict(X_train_set)
    
    #only change in function: include input vectors in training dataframe
    preds_df = pd.DataFrame(X_train_set, columns = cali_housing.feature_names)
    
    preds_df['nn'] = y_pred_nn
    preds_df['gbt'] = y_pred_gbt
    preds_df['bag'] = y_pred_bag
 
    return preds_df

def predict_from_X_set_sp(X_train_set_in):
    X_train_set = X_train_set_in.copy()

    #change final estimator's name to final_etr_sp and use handle_X_set_sp within this function
    return final_etr_sp.predict(handle_X_set_sp(X_train_set))

In [44]:
preds_df_sp = handle_X_set_sp(X_stack)
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.model_selection import RandomizedSearchCV

param_dist = {'max_features' : ['sqrt','log2',1.0],
 'min_samples_leaf' : [1, 2, 3, 7, 11],
 'n_estimators': [50, 100],
 'oob_score': [True, False]}

pre_gs_inst_2 = RandomizedSearchCV(ExtraTreesRegressor(warm_start=True,bootstrap=True,random_state=7),
 param_distributions = param_dist,
 cv=3,
 n_iter = 15,random_state=7)

pre_gs_inst_2.fit(preds_df_sp.values, y_stack)
pre_gs_inst_2.best_params_

{'max_features': 'log2',
 'min_samples_leaf': 2,
 'n_estimators': 100,
 'oob_score': False}

In [45]:
final_etr_sp = ExtraTreesRegressor(**{'max_features': 'log2',
 'min_samples_leaf': 2,
 'n_estimators': 3000,
 'oob_score': False,'random_state':7}).fit(preds_df_sp.values, y_stack)

In [46]:
from sklearn.model_selection import cross_val_score

cross_val_score(final_etr_sp, preds_df_sp.values, y_stack, cv=3).mean()

0.83158654580488089

In [47]:
y_pred = predict_from_X_set_sp(X_test_prin)

from sklearn.metrics import r2_score, mean_absolute_error

print "R-squared",r2_score(y_test_prin, y_pred)
print "MAE : ",mean_absolute_error(y_test_prin, y_pred)
print "MAPE : ",(np.abs(y_test_prin- y_pred)/y_test_prin).mean()

R-squared 0.847505166521
MAE :  0.293429491508
MAPE :  0.161770823792
