In [56]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from itertools import combinations

In [57]:
df = pd.read_csv(
"D://Programming//Machine_Learning//MachineLearningAlgo//SupervisedLearning//DataSets//" \
"advertising.csv")
features = df.iloc[:,:-1].values
target = df.iloc[:,-1].values.reshape(-1,1)

In [58]:
normalize = StandardScaler()
features = normalize.fit_transform(features)
target = normalize.fit_transform(target)

In [59]:
class Stacking:
    def __init__(self, base_estimators, meta_estimator, k=5):
        self.base_estimators = base_estimators
        self.meta_estimator = meta_estimator
        self.k = k

    def fit(self, X, y):
        n = len(y)
        indices = np.arange(n)
        np.random.shuffle(indices)

        folds = np.array_split(indices, self.k)

        oof_preds = {name: np.zeros(n) for name, _ in self.base_estimators}

        for i in range(self.k):
            test_idx = folds[i]
            train_idx = np.hstack([folds[j] for j in range(self.k) if j != i])

            X_train, y_train = X[train_idx], y[train_idx]
            X_test = X[test_idx]

            for name, model in self.base_estimators:
                model.fit(X_train, y_train)
                oof_preds[name][test_idx] = model.predict(X_test).ravel()

        meta_X = pd.DataFrame(oof_preds)
        meta_y = y.ravel()

        self.meta_estimator.fit(meta_X, meta_y)

        for _, model in self.base_estimators:
            model.fit(X, y)

    def predict(self, X):
        meta_X = pd.DataFrame()

        for name, model in self.base_estimators:
            meta_X[name] = model.predict(X).ravel()

        return self.meta_estimator.predict(meta_X)


In [60]:
x_train,x_test,y_train,y_test = train_test_split(features,target,test_size=0.2,random_state=42)

In [61]:
estimators = [
    ("knn",KNeighborsRegressor(n_neighbors=5)),
    ("dt",DecisionTreeRegressor(max_depth=50))
]
meta = LinearRegression()

In [62]:
model = Stacking(base_estimators=estimators,meta_estimator=meta,k = 4)
model.fit(x_train,y_train)

In [63]:
from sklearn.metrics import r2_score
r2_score(y_test,model.predict(x_test))

0.9304092974264172

In [72]:
from sklearn.model_selection import KFold
kf = KFold(n_splits=3)
it = kf.split(range(0,10))
for _ in enumerate(it):
    print(_)

(0, (array([4, 5, 6, 7, 8, 9]), array([0, 1, 2, 3])))
(1, (array([0, 1, 2, 3, 7, 8, 9]), array([4, 5, 6])))
(2, (array([0, 1, 2, 3, 4, 5, 6]), array([7, 8, 9])))
