# Stacking Neural Networks and Ensemble Classifiers

Parts of this kernel have been inspired from this [kernel](https://www.kaggle.com/arthurtok/introduction-to-ensembling-stacking-in-python). 
Every Out-Of-Fold data-set is saved to be fed into the LightGBM Classifier.

## 1. Data Preparation

In [None]:
# import the necessary packages
from keras.models import Sequential
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers.core import Activation
from keras.layers.core import Flatten
from keras.layers.core import Dropout
from keras.layers.core import Dense
from keras import backend as K
from keras.wrappers.scikit_learn import KerasClassifier
import pandas as pd
import pickle
import numpy as np

In [None]:
import pandas as pd
import numpy as np
import re
import sklearn
import xgboost as xgb
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline


import warnings
warnings.filterwarnings('ignore')

# Going to use these 5 base models for the stacking
from sklearn.ensemble import (RandomForestClassifier, AdaBoostClassifier, 
                              GradientBoostingClassifier, ExtraTreesClassifier)
from sklearn.svm import SVC
from sklearn.cross_validation import KFold

In [None]:
df = pd.read_csv('../data/full_features_v2.csv')

In [None]:
df.set_index('SK_ID_CURR',inplace=True)

In [None]:
app = pd.get_dummies(df)
del df

In [None]:
app['DAYS_EMPLOYED'].replace(365243, np.nan, inplace= True)

In [None]:
train, test = app[app['TARGET'].notnull()].copy(), app[app['TARGET'].isnull()].copy()

## 1.1 Dealing with NaN values which cannot be handled by NN's or sklearn ensemble algorithms.

In [None]:
train.fillna(0,inplace=True)

In [None]:
train.replace(np.inf,0,inplace=True)
train.replace(-np.inf,0,inplace=True)

In [None]:
test.fillna(0,inplace=True)

In [None]:
test.replace(np.inf,0,inplace=True)
test.replace(-np.inf,0,inplace=True)

## 1.2 Out of Fold Function and SklearnHelper Classes.

In [None]:
ntrain = train.shape[0]
ntest = test.shape[0]
SEED = 0 # for reproducibility
NFOLDS = 3 # set folds for out-of-fold prediction
kf = KFold(ntrain, n_folds= NFOLDS, random_state=SEED)

# Class to extend the Sklearn classifier
class SklearnHelper(object):
    def __init__(self, clf, seed=0, params=None):
        params['random_state'] = seed
        self.clf = clf(**params)

    def train(self, x_train, y_train):
        self.clf.fit(x_train, y_train)

    def predict(self, x):
        return self.clf.predict_proba(x)[:,1]
    
    def fit(self,x,y):
        return self.clf.fit(x,y)
    
    def feature_importances(self,x,y):
        print(self.clf.fit(x,y).feature_importances_)
    
# Class to extend XGboost classifer

In [None]:
# Class to extend the Sklearn classifier
class FNNSklearnHelper(object):
    def __init__(self, clf, seed=0, params=None):
        #params['random_state'] = seed
        self.clf = clf(**params)

    def train(self, x_train, y_train):
        x_train = x_train.reshape(x_train.shape[0],x_train.shape[1])
        self.clf.fit(x_train, y_train)

    def predict(self, x):
        x = x.reshape(x.shape[0],x.shape[1])
        return self.clf.predict_proba(x)[:,1]
    
    def fit(self,x,y):
        x = x.reshape(x.shape[0],x.shape[1])
        return self.clf.fit(x,y)
    
    def feature_importances(self,x,y):
        print(self.clf.fit(x,y).feature_importances_)
    
# Class to extend XGboost classifer

In [None]:
class CNNSklearnHelper(object):
    def __init__(self, clf, seed=0, params=None):
        #params['random_state'] = seed
        self.clf = clf(**params)

    def train(self, x_train, y_train):
        x_train = x_train.reshape(x_train.shape[0],x_train.shape[1],1)
        self.clf.fit(x_train, y_train)

    def predict(self, x):
        x = x.reshape(x.shape[0],x.shape[1],1)
        return self.clf.predict_proba(x)[:,1]
    
    def fit(self,x,y):
        x = x.reshape(x.shape[0],x.shape[1],1)
        return self.clf.fit(x,y)
    
    def feature_importances(self,x,y):
        print(self.clf.fit(x,y).feature_importances_)

In [None]:
def get_oof(clf, x_train, y_train, x_test):
    oof_train = np.zeros((ntrain,))
    oof_test = np.zeros((ntest,))
    oof_test_skf = np.empty((NFOLDS, ntest))

    for i, (train_index, test_index) in enumerate(kf):
        x_tr = x_train[train_index]
        y_tr = y_train[train_index]
        x_te = x_train[test_index]

        clf.train(x_tr, y_tr)
        print('Training {} done.'.format(i+1))
        oof_train[test_index] = clf.predict(x_te).ravel()
        oof_test_skf[i, :] = clf.predict(x_test).ravel()
        print('Fold {} done.'.format(i+1))

    oof_test[:] = oof_test_skf.mean(axis=0)
    return oof_train.reshape(-1, 1), oof_test.reshape(-1, 1)

In [None]:
# Create Numpy arrays of train, test and target ( Survived) dataframes to feed into our models
y_train = train['TARGET'].ravel()
train = train.drop(['TARGET'], axis=1)
test = test.drop(['TARGET'], axis=1)
x_train = train.values # Creates an array of the train data
x_test = test.values # Creats an array of the test data

## 2. Classifiers and Parameters

In [None]:
rf_params = {
    'n_jobs': -1,
    'n_estimators': 1000,
     'warm_start': True, 
     #'max_features': 0.2,
    'max_depth': 6,
    'min_samples_leaf': 2,
    'max_features' : 'sqrt',
    'verbose': 111,
    'class_weight' : 'balanced'
}

# Extra Trees Parameters
et_params = {
    'n_jobs': -1,
    'n_estimators':1000,
    #'max_features': 0.5,
    'max_depth': 8,
    'min_samples_leaf': 2,
    'verbose': 111,
    'class_weight' : 'balanced'
}
nix_params= {
    'n_estimators': 2000,
    'max_depth' : 4,
    'min_child_weight' : 2,
    'gamma' : 0.9,
    'subsample' : 0.8,
    'colsample_bytree' : 0.8,
    'objective' : 'binary:logistic',
    'nthread': -1,
    'scale_pos_weight' : 1,
    'verbose': 200
}

gbm_params= {
    'objective':'binary:logistic',
    'learning_rate': 0.00764,
    'max_depth': 4,
    'min_child_weight': 5,
    'verbose': 200,
    'silent' : False,
    'subsample': 0.6,
    'colsample_bytree': 0.7,
    #'n_estimators': 2673,
    'n_estimators': 1000,
    'gamma':0.4,
    'nthread': -1
}

In [None]:
rf = SklearnHelper(clf=RandomForestClassifier, seed=SEED, params=rf_params)
et = SklearnHelper(clf=ExtraTreesClassifier, seed=SEED, params=et_params)
gbm = SklearnHelper(clf=xgb.XGBClassifier, seed=SEED,params = gbm_params)

In [None]:
et_oof_train, et_oof_test = get_oof(et, x_train, y_train, x_test) # Extra Trees
np.save('et_oof_train.npz',et_oof_train)
np.save('et_oof_test.npz',et_oof_test)
filename = 'et.sav'
pickle.dump(et, open(filename, 'wb'))

In [None]:
rf_oof_train, rf_oof_test = get_oof(rf,x_train, y_train, x_test) # Random Forest
np.save('rf_oof_train.npz',rf_oof_train)
np.save('rf_oof_test.npz',rf_oof_test)
filename = 'rf.sav'
pickle.dump(rf, open(filename, 'wb'))

In [None]:
gbtrain = gbtrain.drop(['TARGET'], axis=1)
gbtest = gbtest.drop(['TARGET'], axis=1)
xgb_train = gbtrain.values
xgb_test = gbtest.values

In [None]:
gbtrain, gbtest = app[app['TARGET'].notnull()].copy(), app[app['TARGET'].isnull()].copy()

In [None]:
gbm_oof_train, gbm_oof_test = get_oof(gbm, xgb_train, y_train, xgb_test)
np.save('gbm_oof_train.npz',gbm_oof_train)
np.save('gbm_oof_test.npz',gbm_oof_test)
filename = 'gbm.sav'
pickle.dump(gbm, open(filename, 'wb'))

Definition of several different neural networks

In [None]:
def fcnn_model(num_classes = 2,input_shape = None):
    model = Sequential()
    model.add(Dense(128,input_shape=input_shape,kernel_regularizer=regularizers.l2(0.3)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.25))
    model.add(Dense(128,kernel_regularizer=regularizers.l2(0.1)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.25))
    model.add(Dense(64))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dense(2, activation='softmax'))
    model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
    return model

In [None]:
from keras import regularizers
fcnn_params={
    'build_fn': fcnn_model,
    'epochs': 10,
    'batch_size' : 32,
    'verbose' : 1,
    'input_shape': (train.shape[1],)
    
}

In [None]:
def cnn_model(num_classes = 2,input_shape = None):
    model = Sequential()
    model.add(Conv1D(32,5,strides = 1,input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.25))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(32,5,strides = 1))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dense(2, activation='softmax'))
    model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
    return model

In [None]:
cnn_params={
    'build_fn': cnn_model,
    'epochs': 10,
    'batch_size' : 256,
    'verbose' : 1,
    'input_shape': (train.shape[1],1)
    
}

In [None]:
fcnn = FNNSklearnHelper(clf=KerasClassifier,seed=SEED, params=fcnn_params)
cnn = CNNSklearnHelper(clf=KerasClassifier,seed=SEED, params=cnn_params)

In [None]:
fcnn_oof_train, fcnn_oof_test = get_oof(fcnn,x_train, y_train, x_test)
np.save('fcnn_oof_train.npz',fcnn_oof_train)
np.save('fcnn_oof_test.npz',fcnn_oof_test)

In [None]:
cnn_oof_train, cnn_oof_test = get_oof(cnn,x_train, y_train, x_test)
np.save('cnn_oof_train.npz',cnn_oof_train)
np.save('cnn_oof_test.npz',cnn_oof_test)

Since we have an unbalanced dataset we balance the weights out for some models.

In [None]:
from sklearn.utils import class_weight
class_weights = class_weight.compute_class_weight('balanced',
                                                 np.unique(y_train),
                                                 y_train)


In [None]:
cnn_params_2={
    'build_fn': cnn_model,
    'epochs': 10,
    'batch_size' : 256,
    'verbose' : 1,
    'input_shape': (train.shape[1],1),
    'class_weight': class_weights
    
}

In [None]:
cnn2 = CNNSklearnHelper(clf=KerasClassifier,seed=SEED, params=cnn_params_2)

In [None]:
cnn2_oof_train, cnn2_oof_test = get_oof(cnn2,x_train, y_train, x_test)
np.save('cnn2_oof_train.npz',cnn2_oof_train)
np.save('cnn2_oof_test.npz',cnn2_oof_test)

In [None]:
fcnn_params_2={
    'build_fn': fcnn_model,
    'epochs': 15,
    'batch_size' : 256,
    'verbose' : 1,
    'input_shape': (train.shape[1],),
    'class_weight': class_weights
}

In [None]:
fcnn2 = FNNSklearnHelper(clf=KerasClassifier,seed=SEED, params=fcnn_params_2)

In [None]:
fcnn2_oof_train, fcnn2_oof_test = get_oof(fcnn2,x_train, y_train, x_test)
np.save('fcnn2_oof_train.npz',fcnn2_oof_train)
np.save('fcnn2_oof_test.npz',fcnn2_oof_test)

In [None]:
def cnn_model2(num_classes = 2,input_shape = None):
    model = Sequential()
    model.add(Conv1D(128,5,strides = 1,input_shape=input_shape))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Conv1D(128,5,strides = 1))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(0.4))
    model.add(Flatten())
    model.add(Dense(256))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dense(2, activation='softmax'))
    model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
    return model

In [None]:
cnn_params_3={
    'build_fn': cnn_model2,
    'epochs': 15,
    'batch_size' : 1024,
    'verbose' : 1,
    'input_shape': (train.shape[1],1),
    'class_weight': class_weights
    
}

In [None]:
cnn3 = CNNSklearnHelper(clf=KerasClassifier,seed=SEED, params=cnn_params_3)

In [None]:
cnn3_oof_train, cnn3_oof_test = get_oof(cnn3,x_train, y_train, x_test)
np.save('cnn3_oof_train.npz',cnn3_oof_train)
np.save('cnn3_oof_test.npz',cnn3_oof_test)

In [None]:
def fcnn_model2(num_classes = 2,input_shape = None):
    model = Sequential()
    model.add(Dense(128,input_shape=input_shape,kernel_regularizer=regularizers.l2(0.3)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.4))
    model.add(Dense(1024,kernel_regularizer=regularizers.l2(0.1)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.4))
    model.add(Dense(256))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dense(2, activation='softmax'))
    model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
    return model

In [None]:
fcnn_params_3={
    'build_fn': fcnn_model2,
    'epochs': 15,
    'batch_size' : 1024,
    'verbose' : 1,
    'input_shape': (train.shape[1],),
    'class_weight': class_weights
}

In [None]:
fcnn3 = FNNSklearnHelper(clf=KerasClassifier,seed=SEED, params=fcnn_params_3)

In [None]:
fcnn3_oof_train, fcnn3_oof_test = get_oof(fcnn3,x_train, y_train, x_test)
np.save('fcnn3_oof_train.npz',fcnn3_oof_train)
np.save('fcnn3_oof_test.npz',fcnn3_oof_test)

## 3. LightGBM Models

### 3.1 First Level LightGBM

In [None]:
import lightgbm as lgb
class LGBSklearnHelper(object):
    def __init__(self, clf, seed=0, params=None):
        params['random_state'] = seed
        self.params = params
        #self.clf = clf(**params)

    def train(self, x_train, y_train):
        dtrain = lgb.Dataset(x_train, label = y_train)
        self.clf = lgb.train(self.params, dtrain)
        #self.clf.train(dtrain)

    def predict(self, x):
        return self.clf.predict(x)
    

In [None]:
random_hyp = {'is_unbalance': True, 
              'n_estimators': 2673, 
              'num_leaves': 77, 
              'learning_rate': 0.00764, 
              'min_child_samples': 460, 
              'boosting_type': 'gbdt', 
              'subsample_for_bin': 240000, 
              'reg_lambda': 0.20, 
              'reg_alpha': 0.88, 
              'subsample': 0.95, 
              'colsample_bytree': 0.7,
              'verbose':200,
              'objective':'binary'
             }
lgbm = LGBSklearnHelper(clf=None, seed=SEED,params = random_hyp)

### Notice: Somehow the LightGBM algorithm doesn't retrain from scratch and needs much less time for the second and third fold training which is wrong. Needs to be debugged.

In [None]:
lgbm_oof_train, lgbm_oof_test = get_oof(lgbm, xgb_train, y_train, xgb_test)
np.save('lgbm_oof_train.npz',lgbm_oof_train)
np.save('lgbm_oof_test.npz',lgbm_oof_test)

### 3.2 Second Level LightGBM

In [None]:
import numpy as np

In [None]:
fcnn_oof_train = np.load('fcnn_oof_train.npz.npy')
fcnn_oof_test = np.load('fcnn_oof_test.npz.npy')
fcnn2_oof_train = np.load('fcnn2_oof_train.npz.npy')
fcnn2_oof_test = np.load('fcnn2_oof_test.npz.npy')
fcnn3_oof_train = np.load('fcnn3_oof_train.npz.npy')
fcnn3_oof_test = np.load('fcnn3_oof_test.npz.npy')
cnn_oof_train = np.load('cnn_oof_train.npz.npy')
cnn_oof_test = np.load('cnn_oof_test.npz.npy')
cnn2_oof_train = np.load('cnn2_oof_train.npz.npy')
cnn2_oof_test = np.load('cnn2_oof_test.npz.npy')
cnn3_oof_train = np.load('cnn3_oof_train.npz.npy')
cnn3_oof_test = np.load('cnn3_oof_test.npz.npy')
et_oof_train = np.load('et_oof_train.npz.npy')
et_oof_test = np.load('et_oof_test.npz.npy')
rf_oof_train = np.load('rf_oof_train.npz.npy')
rf_oof_test = np.load('rf_oof_test.npz.npy')
gbm_oof_train = np.load('gbm_oof_train.npz.npy')
gbm_oof_test = np.load('gbm_oof_test.npz.npy')
lgbm_oof_train = np.load('lgbm_oof_train.npz.npy')
lgbm_oof_test = np.load('lgbm_oof_test.npz.npy')

In [None]:
x_train_stacked = np.concatenate(( et_oof_train, rf_oof_train, cnn_oof_train, 
                                  gbm_oof_train,lgbm_oof_train, fcnn_oof_train,fcnn2_oof_train,
                                  fcnn3_oof_train,cnn2_oof_train,cnn3_oof_train), axis=1)
x_test_stacked = np.concatenate(( et_oof_test, rf_oof_test, cnn_oof_test, 
                                  gbm_oof_test,lgbm_oof_test, fcnn_oof_test,fcnn2_oof_test,
                                  fcnn3_oof_test,cnn2_oof_test,cnn3_oof_test), axis=1)

In [None]:
train_lgb = lgb.Dataset(x_train_stacked,label = y_train)

In [None]:
model = lgb.train(random_hyp,train_lgb)

In [None]:
preds = model.predict(x_test_stacked)

In [None]:
submission = pd.DataFrame({'SK_ID_CURR': list(test.index),
                            'TARGET': preds})
submission.to_csv('submission_manual.csv', index = False)