In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_diabetes
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, mean_absolute_percentage_error
from sklearn.model_selection import train_test_split, cross_val_predict
from sklearn.svm import SVC
from sklearn.linear_model import RidgeCV, LogisticRegression
from sklearn.ensemble import StackingClassifier, StackingRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
from joblib import Parallel, delayed
# from mlxtend.plotting import plot_decision_regions

from sklearn.ensemble import (AdaBoostClassifier, GradientBoostingClassifier,
                              RandomForestClassifier, ExtraTreesClassifier)
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.base import clone
from sklearn.neighbors import KNeighborsClassifier

In [2]:
class Stacking:
    def __init__(self, estimators, final_estimator, blending=False, cv=5, n_jobs=-1):
        self.estimators = estimators
        self.final_estimator = final_estimator
        self.blending = blending
        self.cv = cv
        self.n_jobs = n_jobs

    def _X_pred(self, estimator, data):
        if self.blending:
            X_train_v, y_train_v, X_val = data
            return estimator.fit(X_train_v, y_train_v).predict(X_val)
        else:
            X_train, y_train = data
            return cross_val_predict(estimator, X_train, y_train, cv=self.cv)

    def _X_test_pred(self, estimator, data):
        X_train, y_train, X_test = data

        return estimator.fit(X_train, y_train).predict(X_test)

    def _meta_data(self, X_train, y_train, X_test):
        if self.blending:
            #used hold-out cross-validation
            X_train_v, X_val, y_train_v, y_val = train_test_split(X_train, y_train, random_state=0)
            train_data = [X_train_v, y_train_v, X_val]
            test_data = [X_train_v, y_train_v, X_test]
            meta_y_train = y_val
        else:
            train_data = [X_train, y_train]
            test_data = [X_train, y_train, X_test]
            meta_y_train = y_train

        cv_X_train_preds = (delayed(self._X_pred)(est, train_data) for est in self.estimators)
        X_test_preds = (delayed(self._X_test_pred)(est, test_data) for est in self.estimators)

        meta_X_train = pd.DataFrame(Parallel(n_jobs=self.n_jobs)(cv_X_train_preds))
        meta_X_test = pd.DataFrame(Parallel(n_jobs=self.n_jobs)(X_test_preds))

        return meta_X_train.T, meta_y_train, meta_X_test.T

    def fit_predict(self, X_train, y_train, X_test):
        # meta learner or blender
        meta_X_train, meta_y_train, meta_X_test = self._meta_data(X_train, y_train, X_test)

        return self.final_estimator.fit(meta_X_train, meta_y_train).predict(meta_X_test)

In [3]:
def decision_boundary_plot(X, y, X_train, y_train, clf, feature_indexes, title=None):
    feature1_name, feature2_name = X.columns[feature_indexes]
    X_feature_columns = X.values[:, feature_indexes]
    X_train_feature_columns = X_train.values[:, feature_indexes]
    clf.fit(X_train_feature_columns, y_train.values)

    plot_decision_regions(X=X_feature_columns, y=y.values, clf=clf)
    plt.xlabel(feature1_name)
    plt.ylabel(feature2_name)
    plt.title(title)

In [4]:
file_name_dum = './datasets/all_dataset_dum.csv'
data = pd.read_csv(file_name_dum, sep=";", encoding='utf8') #, low_memory=False)
data = data.drop(['direction'], axis=1)
data.head(2)


Unnamed: 0,id,distance,len,Кол-во пиков,Сред.зн.пика,Min,Max,Медиана,Смещение низ,Смещение верх,...,Верх.квартиль,Дисперсия,Асимметрия,Куртозис,Std откл.,Коэф.вариации,Std ошибка,X0,X1,X2
0,1,6656,1077,82,6.616,3.836,7.216,5.428,1.592,1.788,...,6.157,0.66,0.033,-1.112,0.812,0.149,0.025,1,0,0
1,1,6625,1077,74,5.802,3.226,6.143,4.719,1.493,1.424,...,5.323,0.555,0.024,-1.114,0.745,0.159,0.023,1,0,0


In [5]:
file_name_dum_balance = './datasets/all_dataset_dum_balance.csv'
data_balance = pd.read_csv(file_name_dum_balance, sep=";", encoding='utf8') #, low_memory=False)
data_balance = data_balance.drop(['direction'], axis=1)
data_balance.head(2)


Unnamed: 0,id,distance,len,Кол-во пиков,Сред.зн.пика,Min,Max,Медиана,Смещение низ,Смещение верх,...,Верх.квартиль,Дисперсия,Асимметрия,Куртозис,Std откл.,Коэф.вариации,Std ошибка,X0,X1,X2
0,1,6656,1077,82,6.616,3.836,7.216,5.428,1.592,1.788,...,6.157,0.66,0.033,-1.112,0.812,0.149,0.025,1,0,0
1,1,6625,1077,74,5.802,3.226,6.143,4.719,1.493,1.424,...,5.323,0.555,0.024,-1.114,0.745,0.159,0.023,1,0,0


### Const

In [6]:
f_0 = ['id', 'distance', 'Кол-во пиков', 'Сред.зн.пика', 'Медиана', 'Смещение', 
       'Ниж.квартиль', 'Верх.квартиль', 'Дисперсия', 'Куртозис', 'X0', 'X1', 'X2']
f_1 = ['id', 'distance', 'Кол-во пиков', 'Сред.зн.пика', 'Медиана', 'Смещение', 
       'Верх.квартиль', 'Дисперсия', 'Куртозис', 'X0', 'X1', 'X2']
# f_2 = ['Max', 'Mean', 'Std откл.', 'Асимметрия', 'Куртозис', 'id']
f_3 = ['id', 'distance', 'Кол-во пиков', 'Медиана', 'Смещение', 
        'Ниж.квартиль', 'Верх.квартиль', 'Дисперсия', 'Куртозис', 'X0', 'X1', 'X2']
f_4 = ['id', 'distance', 'Кол-во пиков', 'Медиана', 'Смещение', 
       'Ниж.квартиль', 'Верх.квартиль', 'Дисперсия', 'Куртозис', 'X0', 'X1', 'X2']

# feature_3 = ['Max', 'Куртозис', 'Гарм.сред.', 'Std откл.', 'Асимметрия', 'Ниж.квартиль', 'Верх.квартиль', 'id'] 
feature_4 = ['Max', 'Куртозис', 'Гарм.сред.', 'Std откл.', 'Асимметрия', 'X0', 'X1', 'X2', 'id'] 
feature_5 = ['Max', 'Куртозис', 'Гарм.сред.', 'Std откл.', 'Асимметрия', 'distance', 'id'] 
feature_6 = ['Max', 'Куртозис', 'Гарм.сред.', 'Std откл.', 'Асимметрия', 'Кол-во пиков', 'Сред.зн.пика', 'id'] 
feature_7 = ['Max', 'Куртозис', 'Гарм.сред.', 'X0', 'X1', 'X2', 'distance', 'Кол-во пиков', 'Сред.зн.пика', 'id'] 
feature_8 = ['Max', 'Куртозис', 'Гарм.сред.', 'Std откл.', 'Асимметрия', 'X0', 'X1', 'X2', 'distance', 'id'] 

In [7]:
LogisticRegression(max_iter=5000),
KNeighborsClassifier(),
RandomForestClassifier(n_estimators=300, random_state=42),
AdaBoostClassifier()
SVC(probability=True, random_state=0)
GradientBoostingClassifier(n_estimators=200, random_state=42)
KNeighborsClassifier(),  # Метод ближайших соседей со стандартными параметрами
ExtraTreesClassifier(n_estimators=300, random_state=42)  # Случайный лес из 300 экстремальных деревьев

ExtraTreesClassifier(n_estimators=300, random_state=42)

### main

In [8]:
df = data_balance[f_0]
y =  np.array(df['id'])  # Target variable
X = np.array(df.drop('id', axis=1))  # Features
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)
print(X_train.shape)

estimators = [
    # LogisticRegression(random_state=0, max_iter=10000),
    # GradientBoostingClassifier(random_state=0),
    # SVC(probability=True, random_state=0),
    KNeighborsClassifier(),  # Метод ближайших соседей со стандартными параметрами
    ExtraTreesClassifier(n_estimators=300, random_state=42)  # Случайный лес из 300 экстремальных деревьев
]

estimator = LogisticRegression(random_state=42)
stacking_clf = Stacking(estimators=estimators, final_estimator=estimator)
stacking_pred_res = stacking_clf.fit_predict(X_train, y_train, X_test)
stacking_accuracy = accuracy_score(stacking_pred_res, y_test)
print(f'stacking_accuracy: {stacking_accuracy}')

blending_clf = Stacking(estimators=estimators, final_estimator=estimator, blending=True)
blending_pred_res = blending_clf.fit_predict(X_train, y_train, X_test)
blending_accuracy = accuracy_score(blending_pred_res, y_test)
print(f'blending_accuracy: {blending_accuracy}')

(3385, 12)
stacking_accuracy: 0.743801652892562
blending_accuracy: 0.7367178276269185


In [1]:
df = data_balance
y =  np.array(df['id'])  # Target variable
X = np.array(df.drop('id', axis=1))  # Features
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)
print(X_train.shape)

estimators = [
    # LogisticRegression(random_state=0, max_iter=10000),
    # GradientBoostingClassifier(random_state=0),
    # SVC(probability=True, random_state=0),
    KNeighborsClassifier(),  # Метод ближайших соседей со стандартными параметрами
    ExtraTreesClassifier(n_estimators=300, random_state=42)  # Случайный лес из 300 экстремальных деревьев
]

estimator = LogisticRegression(random_state=42)
stacking_clf = Stacking(estimators=estimators, final_estimator=estimator)
stacking_pred_res = stacking_clf.fit_predict(X_train, y_train, X_test)
stacking_accuracy = accuracy_score(stacking_pred_res, y_test)
print(f'stacking_accuracy: {stacking_accuracy}')

blending_clf = Stacking(estimators=estimators, final_estimator=estimator, blending=True)
blending_pred_res = blending_clf.fit_predict(X_train, y_train, X_test)
blending_accuracy = accuracy_score(blending_pred_res, y_test)
print(f'blending_accuracy: {blending_accuracy}')

NameError: name 'data_balance' is not defined

In [10]:
df = data_balance[f_0]
y =  np.array(df['id'])  # Target variable
X = np.array(df.drop('id', axis=1))  # Features
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)
print(X_train.shape)

estimators = [
    LogisticRegression(max_iter=5000, random_state=42),
    RandomForestClassifier(n_estimators=300, random_state=42),
    AdaBoostClassifier(),
    SVC(probability=True, random_state=42),
    GradientBoostingClassifier(n_estimators=200, random_state=42),
    KNeighborsClassifier(),  # Метод ближайших соседей со стандартными параметрами
    ExtraTreesClassifier(n_estimators=300, random_state=42)  # Случайный лес из 300 экстремальных деревьев
]

estimator = LogisticRegression(random_state=0)
stacking_clf = Stacking(estimators=estimators, final_estimator=estimator)
stacking_pred_res = stacking_clf.fit_predict(X_train, y_train, X_test)
stacking_accuracy = accuracy_score(stacking_pred_res, y_test)
print(f'stacking_accuracy: {stacking_accuracy}')

blending_clf = Stacking(estimators=estimators, final_estimator=estimator, blending=True)
blending_pred_res = blending_clf.fit_predict(X_train, y_train, X_test)
blending_accuracy = accuracy_score(blending_pred_res, y_test)
print(f'blending_accuracy: {blending_accuracy}')

(3385, 12)
stacking_accuracy: 0.7603305785123967
blending_accuracy: 0.7426210153482881


In [11]:
df = data_balance
y =  np.array(df['id'])  # Target variable
X = np.array(df.drop('id', axis=1))  # Features
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)
print(X_train.shape)

estimators = [
    LogisticRegression(max_iter=5000, random_state=42),
    RandomForestClassifier(n_estimators=300, random_state=42),
    AdaBoostClassifier(),
    SVC(probability=True, random_state=42),
    GradientBoostingClassifier(n_estimators=200, random_state=42),
    KNeighborsClassifier(),  # Метод ближайших соседей со стандартными параметрами
    ExtraTreesClassifier(n_estimators=300, random_state=42)  # Случайный лес из 300 экстремальных деревьев
]

estimator = LogisticRegression(random_state=0)
stacking_clf = Stacking(estimators=estimators, final_estimator=estimator)
stacking_pred_res = stacking_clf.fit_predict(X_train, y_train, X_test)
stacking_accuracy = accuracy_score(stacking_pred_res, y_test)
print(f'stacking_accuracy: {stacking_accuracy}')

blending_clf = Stacking(estimators=estimators, final_estimator=estimator, blending=True)
blending_pred_res = blending_clf.fit_predict(X_train, y_train, X_test)
blending_accuracy = accuracy_score(blending_pred_res, y_test)
print(f'blending_accuracy: {blending_accuracy}')

(3385, 26)
stacking_accuracy: 0.8028335301062574
blending_accuracy: 0.7898465171192444


In [12]:
df = data_balance
y =  np.array(df['id'])  # Target variable
X = np.array(df.drop('id', axis=1))  # Features
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)
print(X_train.shape)

estimators = [
    LogisticRegression(max_iter=5000, random_state=42),
    RandomForestClassifier(n_estimators=100, random_state=42),
    AdaBoostClassifier(),
    SVC(probability=True, random_state=42),
    GradientBoostingClassifier(n_estimators=100, random_state=42),
    KNeighborsClassifier(),  # Метод ближайших соседей со стандартными параметрами
    ExtraTreesClassifier(n_estimators=100, random_state=42)  # Случайный лес из 300 экстремальных деревьев
]

estimator = LogisticRegression(random_state=0)
stacking_clf = Stacking(estimators=estimators, final_estimator=estimator)
stacking_pred_res = stacking_clf.fit_predict(X_train, y_train, X_test)
stacking_accuracy = accuracy_score(stacking_pred_res, y_test)
print(f'stacking_accuracy: {stacking_accuracy}')

blending_clf = Stacking(estimators=estimators, final_estimator=estimator, blending=True)
blending_pred_res = blending_clf.fit_predict(X_train, y_train, X_test)
blending_accuracy = accuracy_score(blending_pred_res, y_test)
print(f'blending_accuracy: {blending_accuracy}')

(3385, 26)
stacking_accuracy: 0.7922077922077922
blending_accuracy: 0.7863046044864227


In [13]:
df = data
y =  np.array(df['id'])  # Target variable
X = np.array(df.drop('id', axis=1))  # Features
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)
print(X_train.shape)

estimators = [
    LogisticRegression(max_iter=5000, random_state=42),
    RandomForestClassifier(n_estimators=300, random_state=42),
    AdaBoostClassifier(),
    SVC(probability=True, random_state=42),
    GradientBoostingClassifier(n_estimators=200, random_state=42),
    KNeighborsClassifier(),  # Метод ближайших соседей со стандартными параметрами
    ExtraTreesClassifier(n_estimators=300, random_state=42)  # Случайный лес из 300 экстремальных деревьев
]

estimator = LogisticRegression(random_state=0)
stacking_clf = Stacking(estimators=estimators, final_estimator=estimator)
stacking_pred_res = stacking_clf.fit_predict(X_train, y_train, X_test)
stacking_accuracy = accuracy_score(stacking_pred_res, y_test)
print(f'stacking_accuracy: {stacking_accuracy}')

blending_clf = Stacking(estimators=estimators, final_estimator=estimator, blending=True)
blending_pred_res = blending_clf.fit_predict(X_train, y_train, X_test)
blending_accuracy = accuracy_score(blending_pred_res, y_test)
print(f'blending_accuracy: {blending_accuracy}')

(3972, 26)
stacking_accuracy: 0.771399798590131
blending_accuracy: 0.7522658610271903


In [14]:
df = data_balance[f_0]
y =  np.array(df['id'])  # Target variable
X = np.array(df.drop('id', axis=1))  # Features
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)
print(X_train.shape)

estimators = [
    LogisticRegression(random_state=0, max_iter=10000),
    GradientBoostingClassifier(random_state=0),
    SVC(probability=True, random_state=0),
]

estimator = LogisticRegression(random_state=0)
stacking_clf = Stacking(estimators=estimators, final_estimator=estimator)
stacking_pred_res = stacking_clf.fit_predict(X_train, y_train, X_test)
stacking_accuracy = accuracy_score(stacking_pred_res, y_test)
print(f'stacking_accuracy: {stacking_accuracy}')

blending_clf = Stacking(estimators=estimators, final_estimator=estimator, blending=True)
blending_pred_res = blending_clf.fit_predict(X_train, y_train, X_test)
blending_accuracy = accuracy_score(blending_pred_res, y_test)
print(f'blending_accuracy: {blending_accuracy}')

(3385, 12)
stacking_accuracy: 0.7142857142857143
blending_accuracy: 0.7107438016528925


In [15]:
df = data_balance[f_1]
y =  np.array(df['id'])  # Target variable
X = np.array(df.drop('id', axis=1))  # Features
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)
print(X_train.shape)

estimators = [LogisticRegression(random_state=0, max_iter=10000),
              GradientBoostingClassifier(random_state=0),
              SVC(probability=True, random_state=0)]

estimator = RandomForestClassifier(random_state=0)
stacking_clf = Stacking(estimators=estimators, final_estimator=estimator)
stacking_pred_res = stacking_clf.fit_predict(X_train, y_train, X_test)
stacking_accuracy = accuracy_score(stacking_pred_res, y_test)
print(f'stacking_accuracy: {stacking_accuracy}')

blending_clf = Stacking(estimators=estimators, final_estimator=estimator, blending=True)
blending_pred_res = blending_clf.fit_predict(X_train, y_train, X_test)
blending_accuracy = accuracy_score(blending_pred_res, y_test)
print(f'blending_accuracy: {blending_accuracy}')

(3385, 11)
stacking_accuracy: 0.7225501770956316
blending_accuracy: 0.706021251475797


In [16]:
df = data_balance[f_3]
y =  np.array(df['id'])  # Target variable
X = np.array(df.drop('id', axis=1))  # Features
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)
print(X_train.shape)

estimators = [LogisticRegression(random_state=0, max_iter=10000),
              GradientBoostingClassifier(random_state=0),
              SVC(probability=True, random_state=0)]

estimator = RandomForestClassifier(random_state=0)
stacking_clf = Stacking(estimators=estimators, final_estimator=estimator)
stacking_pred_res = stacking_clf.fit_predict(X_train, y_train, X_test)
stacking_accuracy = accuracy_score(stacking_pred_res, y_test)
print(f'stacking_accuracy: {stacking_accuracy}')

blending_clf = Stacking(estimators=estimators, final_estimator=estimator, blending=True)
blending_pred_res = blending_clf.fit_predict(X_train, y_train, X_test)
blending_accuracy = accuracy_score(blending_pred_res, y_test)
print(f'blending_accuracy: {blending_accuracy}')

(3385, 11)
stacking_accuracy: 0.7367178276269185
blending_accuracy: 0.7201889020070839


In [17]:
df = data_balance[f_4]
y =  np.array(df['id'])  # Target variable
X = np.array(df.drop('id', axis=1))  # Features
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)
print(X_train.shape)

estimators = [LogisticRegression(random_state=0, max_iter=10000),
              GradientBoostingClassifier(random_state=0),
              SVC(probability=True, random_state=0)]

estimator = RandomForestClassifier(random_state=0)
stacking_clf = Stacking(estimators=estimators, final_estimator=estimator)
stacking_pred_res = stacking_clf.fit_predict(X_train, y_train, X_test)
stacking_accuracy = accuracy_score(stacking_pred_res, y_test)
print(f'stacking_accuracy: {stacking_accuracy}')

blending_clf = Stacking(estimators=estimators, final_estimator=estimator, blending=True)
blending_pred_res = blending_clf.fit_predict(X_train, y_train, X_test)
blending_accuracy = accuracy_score(blending_pred_res, y_test)
print(f'blending_accuracy: {blending_accuracy}')

(3385, 11)
stacking_accuracy: 0.7367178276269185
blending_accuracy: 0.7201889020070839


In [18]:
df = data_balance[feature_5]
y =  np.array(df['id'])  # Target variable
X = np.array(df.drop('id', axis=1))  # Features
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)
print(X_train.shape)

estimators = [LogisticRegression(random_state=0, max_iter=10000),
              GradientBoostingClassifier(random_state=0),
              SVC(probability=True, random_state=0)]

estimator = RandomForestClassifier(random_state=0)
stacking_clf = Stacking(estimators=estimators, final_estimator=estimator)
stacking_pred_res = stacking_clf.fit_predict(X_train, y_train, X_test)
stacking_accuracy = accuracy_score(stacking_pred_res, y_test)
print(f'stacking_accuracy: {stacking_accuracy}')

blending_clf = Stacking(estimators=estimators, final_estimator=estimator, blending=True)
blending_pred_res = blending_clf.fit_predict(X_train, y_train, X_test)
blending_accuracy = accuracy_score(blending_pred_res, y_test)
print(f'blending_accuracy: {blending_accuracy}')

(3385, 6)
stacking_accuracy: 0.731995277449823
blending_accuracy: 0.717827626918536


In [None]:
df = data_balance[feature_6]
y =  np.array(df['id'])  # Target variable
X = np.array(df.drop('id', axis=1))  # Features
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)
print(X_train.shape)

estimators = [LogisticRegression(random_state=0, max_iter=10000),
              GradientBoostingClassifier(random_state=0),
              SVC(probability=True, random_state=0)]

estimator = RandomForestClassifier(random_state=0)
stacking_clf = Stacking(estimators=estimators, final_estimator=estimator)
stacking_pred_res = stacking_clf.fit_predict(X_train, y_train, X_test)
stacking_accuracy = accuracy_score(stacking_pred_res, y_test)
print(f'stacking_accuracy: {stacking_accuracy}')

blending_clf = Stacking(estimators=estimators, final_estimator=estimator, blending=True)
blending_pred_res = blending_clf.fit_predict(X_train, y_train, X_test)
blending_accuracy = accuracy_score(blending_pred_res, y_test)
print(f'blending_accuracy: {blending_accuracy}')

(3385, 7)


In [None]:
df = data_balance[feature_7]
y =  np.array(df['id'])  # Target variable
X = np.array(df.drop('id', axis=1))  # Features
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)
print(X_train.shape)

estimators = [LogisticRegression(random_state=0, max_iter=10000),
              GradientBoostingClassifier(random_state=0),
              SVC(probability=True, random_state=0)]

estimator = RandomForestClassifier(random_state=0)
stacking_clf = Stacking(estimators=estimators, final_estimator=estimator)
stacking_pred_res = stacking_clf.fit_predict(X_train, y_train, X_test)
stacking_accuracy = accuracy_score(stacking_pred_res, y_test)
print(f'stacking_accuracy: {stacking_accuracy}')

blending_clf = Stacking(estimators=estimators, final_estimator=estimator, blending=True)
blending_pred_res = blending_clf.fit_predict(X_train, y_train, X_test)
blending_accuracy = accuracy_score(blending_pred_res, y_test)
print(f'blending_accuracy: {blending_accuracy}')

In [None]:
df = data_balance[feature_8]
y =  np.array(df['id'])  # Target variable
X = np.array(df.drop('id', axis=1))  # Features
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)
print(X_train.shape)

estimators = [LogisticRegression(random_state=0, max_iter=10000),
              GradientBoostingClassifier(random_state=0),
              SVC(probability=True, random_state=0)]

estimator = RandomForestClassifier(random_state=0)
stacking_clf = Stacking(estimators=estimators, final_estimator=estimator)
stacking_pred_res = stacking_clf.fit_predict(X_train, y_train, X_test)
stacking_accuracy = accuracy_score(stacking_pred_res, y_test)
print(f'stacking_accuracy: {stacking_accuracy}')

blending_clf = Stacking(estimators=estimators, final_estimator=estimator, blending=True)
blending_pred_res = blending_clf.fit_predict(X_train, y_train, X_test)
blending_accuracy = accuracy_score(blending_pred_res, y_test)
print(f'blending_accuracy: {blending_accuracy}')