# Ensemble Learning Demo
A simple machine learning program using **Stacking Classifier** (ensemble of multiple models) with train/validation/test splits.

In [9]:
# Import libraries
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import StackingClassifier, RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt

In [3]:
# Load sample dataset
iris = load_iris()
X, y = iris.data, iris.target

print(f"Dataset shape: {X.shape}")
print(f"Classes: {iris.target_names}")

Dataset shape: (150, 4)
Classes: ['setosa' 'versicolor' 'virginica']


In [4]:
# Split into train (60%), validation (20%), test (20%)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42, stratify=y)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)

print(f"Train size: {len(X_train)}")
print(f"Validation size: {len(X_val)}")
print(f"Test size: {len(X_test)}")

Train size: 90
Validation size: 30
Test size: 30


In [10]:
# Define base models (level 0) for stacking
base_models = [
    ('rf', RandomForestClassifier(n_estimators=50, max_depth=5, random_state=42)),
    ('svc', SVC(probability=True, random_state=42)),
    ('knn', KNeighborsClassifier(n_neighbors=5)),
    ('dt', DecisionTreeClassifier(max_depth=5, random_state=42))
]

# Meta-learner (level 1) - learns from base model predictions
meta_learner = LogisticRegression(max_iter=200, random_state=42)

# Create Stacking Classifier ensemble
ensemble_model = StackingClassifier(
    estimators=base_models,
    final_estimator=meta_learner,
    cv=5,  # Cross-validation for generating meta-features
    stack_method='auto'
)
ensemble_model.fit(X_train, y_train)

print("Stacking Classifier Ensemble trained successfully!")
print(f"Base models (Level 0): {[name for name, _ in base_models]}")
print(f"Meta-learner (Level 1): LogisticRegression")

Stacking Classifier Ensemble trained successfully!
Base models (Level 0): ['rf', 'svc', 'knn', 'dt']
Meta-learner (Level 1): LogisticRegression


In [11]:
# Evaluate on validation set
val_predictions = ensemble_model.predict(X_val)
val_accuracy = accuracy_score(y_val, val_predictions)

print(f"Stacking Ensemble Validation Accuracy: {val_accuracy:.4f}")
print("\nValidation Classification Report:")
print(classification_report(y_val, val_predictions, target_names=iris.target_names))

Stacking Ensemble Validation Accuracy: 0.9667

Validation Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       0.91      1.00      0.95        10
   virginica       1.00      0.90      0.95        10

    accuracy                           0.97        30
   macro avg       0.97      0.97      0.97        30
weighted avg       0.97      0.97      0.97        30



In [12]:
# Final evaluation on test set
test_predictions = ensemble_model.predict(X_test)
test_accuracy = accuracy_score(y_test, test_predictions)

print(f"Stacking Ensemble Test Accuracy: {test_accuracy:.4f}")
print("\nTest Classification Report:")
print(classification_report(y_test, test_predictions, target_names=iris.target_names))

Stacking Ensemble Test Accuracy: 0.9333

Test Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       0.83      1.00      0.91        10
   virginica       1.00      0.80      0.89        10

    accuracy                           0.93        30
   macro avg       0.94      0.93      0.93        30
weighted avg       0.94      0.93      0.93        30



In [13]:
# Compare individual models vs stacking ensemble
print("Individual Model Accuracies (Test Set):")
print("-" * 40)

for name, model in ensemble_model.named_estimators_.items():
    individual_pred = model.predict(X_test)
    individual_acc = accuracy_score(y_test, individual_pred)
    print(f"{name:5}: {individual_acc:.4f}")

print("-" * 40)
print(f"Stacking Ensemble: {test_accuracy:.4f}")

Individual Model Accuracies (Test Set):
----------------------------------------
rf   : 0.9333
svc  : 1.0000
knn  : 0.9000
dt   : 0.9333
----------------------------------------
Stacking Ensemble: 0.9333


## Summary
- **Ensemble Method**: Stacking Classifier
- **Level 0 (Base Models)**: Random Forest, SVM, KNN, Decision Tree
- **Level 1 (Meta-Learner)**: Logistic Regression learns to combine base model predictions
- **Data Splits**: Train (60%), Validation (20%), Test (20%)
- Stacking trains a meta-model on top of base model predictions, often achieving better results than simple voting.