In [1]:
# IMPORTS
import pandas as pd
import numpy as np
from numpy import mean
from numpy import std
from numpy import hstack
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import StackingClassifier
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
import statistics as st
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import roc_auc_score, accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef

In [2]:
# SPLITTING THE DATASET
df = pd.read_csv('DATASET-balanced 1.csv')
X = df.drop('LABEL', axis = 1)
y = df['LABEL']
# split dataset into train and test sets
X_train_full, X_test, y_train_full, y_test = train_test_split(X, y, test_size=0.5, random_state=1)
# split training set into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, test_size=0.33, random_state=1)

In [3]:
# get a list of base models
def get_models():
 models = list()
 models.append(('lr', LogisticRegression()))
 models.append(('knn', KNeighborsClassifier()))
 models.append(('cart', DecisionTreeClassifier()))
 models.append(('svm', SVC(probability=True)))
 models.append(('bayes', GaussianNB()))
 return models

In [4]:
# fit the blending ensemble
def fit_ensemble(models, X_train, X_val, y_train, y_val):
 # fit all models on the training set and predict on hold out set
 meta_X = list()
 for name, model in models:
    # fit in training set
    model.fit(X_train, y_train)
    # predict on hold out set
    yhat = model.predict_proba(X_val)
    # store predictions as input for blending
    meta_X.append(yhat)
    # create 2d array from predictions, each set is an input feature
    meta_X = hstack(meta_X)
 # define blending model
 blender = LogisticRegression()
 # fit on predictions from base models
 blender.fit(meta_X, y_val)
 return blender

In [5]:
# make a prediction with the blending ensemble
def predict_ensemble(models, blender, X_test):
 # make predictions with base models
 meta_X = list()
 for name, model in models:
     # predict with base model
     yhat = model.predict_proba(X_test)
 # store prediction
 meta_X.append(yhat)
 # create 2d array from predictions, each set is an input feature
 meta_X = hstack(meta_X)
 # predict
 return blender.predict(meta_X)

In [6]:
# summarize data split
print('Train: %s, Val: %s, Test: %s' % (X_train.shape, X_val.shape, X_test.shape))
# create the base models
models = get_models()
# train the blending ensemble
blender = fit_ensemble(models, X_train, X_val, y_train, y_val)
# make predictions on test set
yhat = predict_ensemble(models, blender, X_test)
# evaluate predictions
score = accuracy_score(y_test, yhat)
print('Blending Accuracy: %.3f' % (score*100))

Train: (3945, 26), Val: (1944, 26), Test: (5889, 26)


AttributeError: 'numpy.ndarray' object has no attribute 'append'