In [98]:
# implementing a stacking example 
# base estimators - LogisticRegression, KNN
# meta-estimator - Single DecisionTree

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.datasets import make_regression
from sklearn.base import clone, BaseEstimator, TransformerMixin, RegressorMixin

In [99]:
df = pd.read_csv('train_kaggle.csv')
df.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000


In [155]:
class StackingClassifier(BaseEstimator, TransformerMixin, RegressorMixin):
    
    def __init__(self, classifiers):
        self.classifiers = classifiers
        self.meta_classifier = LinearRegression()
    
    def fit(self, X, y):
        for clf in self.classifiers:
            clf.fit(X, y)
        
        self.meta_classifier.fit(self._get_meta_features(X), y)
        return self
    
    def _get_meta_features(self, X):
        probas = np.asarray([
            clf.predict(X) for clf in self.classifiers
        ])
        return (np.stack((np.asarray(probas[0]), np.asarray(probas[1])), axis=-1))
    
    def predict(self, X):
        return self.meta_classifier.predict(self._get_meta_features(X))
    
    #def predict_proba(self, X):
        #return self.meta_classifier.predict_proba(self._get_meta_features(X))

In [156]:
X, y = make_regression(
    n_samples=1000, n_features=50,n_informative=30,
     random_state=11
)

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=11)

In [157]:
y_train

array([ 2.52841740e+02,  8.70884828e+01, -1.51605437e+02, -5.00174222e+02,
       -4.29048105e+02,  1.68572580e+02, -3.32643873e+02,  4.00766016e+02,
        1.98263486e+02, -3.93356365e+02,  2.69023315e+02, -3.70914823e+02,
        1.20110903e+02, -2.46862567e+02,  7.71856456e+01,  5.85458293e+02,
       -1.00234446e+02,  1.00747174e+02, -1.83560041e+02, -8.67025069e-01,
       -3.80616854e+01, -3.47436406e+02,  4.09410562e+02, -4.21841348e+02,
       -8.51326718e+01, -5.75813774e+02, -2.18612976e+02, -4.75778412e+02,
       -2.35318955e+02, -3.51781306e+02, -1.14063183e+02,  1.89983216e+02,
        1.27439613e+02, -2.62726891e+02,  5.41639254e+02, -6.02112716e+01,
        3.99349832e+02, -2.35038083e+02, -1.07251807e+02,  1.02436772e+02,
       -4.88070659e+02,  3.67387583e+02, -3.06656718e+02,  1.78105271e+02,
        1.57293218e+02,  1.42544917e+02,  3.15486872e+02,  1.90439520e+02,
       -6.59301638e+01,  3.42660330e+02, -1.70069506e+02,  3.22065853e+02,
       -1.52538649e+02, -

In [158]:
lr = LinearRegression()
lr.fit(X_train, y_train)
print('Logistic Regression Accuracy: ', lr.score(X_test, y_test))

Logistic Regression Accuracy:  1.0


In [159]:
knn = KNeighborsRegressor(n_neighbors=30)
knn.fit(X_train, y_train)
print('KNN Accuracy: ', knn.score(X_test, y_test))

KNN Accuracy:  0.43817767790477


In [160]:
probas = np.asarray([
            clf.predict(X) for clf in base_classifiers
        ])
np.stack((np.asarray(probas[0]), np.asarray(probas[1])), axis=-1)

array([[  60.34811913,   42.67046974],
       [-694.2802085 , -170.4850211 ],
       [-157.25042007,  -88.40455984],
       ...,
       [-373.1863663 ,  -79.04957122],
       [   1.03609226,  -55.18007387],
       [-128.41894336,  -11.85248016]])

In [161]:
base_classifiers = [lr, knn]
stacking = StackingClassifier(base_classifiers)
stacking.fit(X_train, y_train)
print('Stacking classifier accuracy: ', stacking.score(X_test, y_test))


Stacking classifier accuracy:  1.0
