Multi-Level Fusion Using Stacking Classifier (Stacking with Multi-Level Fusion and Logistic Regression Meta-Learner)

In [None]:


import os
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import warnings

warnings.filterwarnings("ignore")

# **Step 1: Load feature files**
def load_feature_files(base_path):
    """Load train, validation, and test CSV files from each feature folder."""
    feature_sets = ["Frequency", "Gabor", "HOG", "Statistical", "ColorHistogram"]
    data = {}

    for feature in feature_sets:
        train_path = os.path.join(base_path, feature, "train.csv")
        val_path = os.path.join(base_path, feature, "val.csv")
        test_path = os.path.join(base_path, feature, "test.csv")

        data[feature] = {
            "train": pd.read_csv(train_path),
            "val": pd.read_csv(val_path),
            "test": pd.read_csv(test_path),
        }

    return data

# Path to your dataset (update this path accordingly)
base_path = r"E:\Abroad period research\Feature Fusion paper\Eye dataset\Final codes for github\Features"
data = load_feature_files(base_path)

# **Step 2: Combine train and val files, then split features and labels**
def combine_and_split_features(data):
    """Combine train and val datasets, and split features and labels."""
    X_train_val, y_train_val = {}, {}
    X_test, y_test = {}, {}

    for feature, datasets in data.items():
        # Combine train and val datasets
        combined_train_val = pd.concat([datasets["train"], datasets["val"]], ignore_index=True)

        # Split features and labels
        X_train_val[feature] = combined_train_val.iloc[:, :-1]  # All columns except last
        y_train_val[feature] = combined_train_val.iloc[:, -1]  # Last column as label
        X_test[feature] = datasets["test"].iloc[:, :-1]
        y_test[feature] = datasets["test"].iloc[:, -1]

    return X_train_val, y_train_val, X_test, y_test

X_train_val, y_train_val, X_test, y_test = combine_and_split_features(data)

# **Step 3: Train individual Decision Tree classifiers**
def train_decision_trees(X_train, y_train):
    """Train Decision Tree classifiers for each feature set."""
    classifiers = {}

    for feature, X in X_train.items():
        print(f"Training Decision Tree on {feature} features...")
        clf = DecisionTreeClassifier(max_depth=10, random_state=42)
        clf.fit(X, y_train[feature])
        classifiers[feature] = clf

    return classifiers

classifiers = train_decision_trees(X_train_val, y_train_val)

# **Step 4: Stacking Classifier for Multi-Level Fusion**
def create_stacking_ensemble(classifiers, X_train, y_train, X_test, y_test):
    """
    Combine classifiers using a Stacking Classifier.
    """
    # Create base learners for stacking
    base_learners = [
        (f"{feature}_clf", clf) for feature, clf in classifiers.items()
    ]

    # Meta-learner (Logistic Regression)
    meta_learner = LogisticRegression(random_state=42)

    # Define StackingClassifier
    stacking_clf = StackingClassifier(estimators=base_learners, final_estimator=meta_learner)

    # Prepare combined feature sets for StackingClassifier
    X_train_combined = pd.concat([X_train[feature] for feature in classifiers.keys()], axis=1)
    X_test_combined = pd.concat([X_test[feature] for feature in classifiers.keys()], axis=1)

    # Ensure that labels match the combined features
    # Since all feature sets share the same labels, use one of them as the target
    y_train_combined = y_train[next(iter(classifiers.keys()))]
    y_test_combined = y_test[next(iter(classifiers.keys()))]

    # Train the StackingClassifier on combined features
    stacking_clf.fit(X_train_combined, y_train_combined)

    # Evaluate on test data
    y_pred = stacking_clf.predict(X_test_combined)
    print("\nStacking Classifier Results (Multi-Level Fusion):")
    print(classification_report(y_test_combined, y_pred, digits=4))

    return stacking_clf, y_pred

# Call the function
stacking_clf, stacking_predictions = create_stacking_ensemble(classifiers, X_train_val, y_train_val, X_test, y_test)

# **Step 5: Print Stacking Classifier Accuracy**
stacking_accuracy = accuracy_score(
    y_test[next(iter(classifiers.keys()))], stacking_predictions
)
print(f"Stacking Classifier Accuracy: {stacking_accuracy:.2f}")
