In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score


In [2]:
# Load dataset
X, y = load_iris(return_X_y=True)

In [3]:
X

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [4]:
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [5]:
# Split into main training and test sets

X_main_train, X_test, y_main_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Further split the main training set into training and validation sets

X_train, X_val, y_train, y_val = train_test_split(X_main_train, y_main_train, test_size=0.2, random_state=42)

In [7]:
# Base models

base_model_1 = RandomForestClassifier(random_state=42)
base_model_2 = GradientBoostingClassifier(random_state=42)

In [8]:
# Train base models on the training data

base_model_1.fit(X_train, y_train)
base_model_2.fit(X_train, y_train)

In [9]:
# Generate predictions on the validation set for the meta-model

val_preds_1 = base_model_1.predict_proba(X_val)[:, 1]  # For binary classification, take the probability of the positive class
val_preds_2 = base_model_2.predict_proba(X_val)[:, 1]

In [10]:
# Stack predictions together for the meta-model

meta_X = np.column_stack((val_preds_1, val_preds_2))

In [11]:
# Meta-model

meta_model = LogisticRegression()

In [12]:
# Train meta-model on the predictions of the base models (on validation set)

meta_model.fit(meta_X, y_val)

In [13]:
# Now evaluate on the test set (final holdout set)

# test_preds_1 = base_model_1.predict_proba(X_test)[:, 1]
# test_preds_2 = base_model_2.predict_proba(X_test)[:, 1]

test_preds_1 = base_model_1.predict(X_test)
test_preds_2 = base_model_2.predict(X_test)


In [14]:
# Stack test predictions for the meta-model

meta_test_X = np.column_stack((test_preds_1, test_preds_2))

In [15]:
# Make predictions with the meta-model

final_test_preds = meta_model.predict(meta_test_X)

In [16]:
# Evaluate performance using accuracy
print("Accuracy of Blending Classifier on test data:", accuracy_score(y_test, final_test_preds))

Accuracy of Blending Classifier on test data: 0.6333333333333333
