In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import StackingClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report
import warnings

warnings.filterwarnings("ignore")

# 1. Load feature files
def load_feature_files(base_path):
    """Load train, validation, and test CSV files from each feature folder."""
    feature_sets = ["Frequency", "Gabor", "HOG", "Statistical", "ColorHistogram"]
    data = {}

    for feature in feature_sets:
        train_path = os.path.join(base_path, feature, "train.csv")
        val_path = os.path.join(base_path, feature, "val.csv")
        test_path = os.path.join(base_path, feature, "test.csv")

        data[feature] = {
            "train": pd.read_csv(train_path),
            "val": pd.read_csv(val_path),
            "test": pd.read_csv(test_path),
        }

    return data

# Update the path to your dataset
base_path = r"E:\Abroad period research\Feature Fusion paper\Eye dataset\Final codes for github\Features"
data = load_feature_files(base_path)

# 2. Combine train and val files, then split features and labels
def combine_and_split_features(data):
    """Combine train and val datasets, and split features and labels."""
    X_train_val, y_train_val = {}, {}
    X_test, y_test = {}, {}

    for feature, datasets in data.items():
        # Combine train and val datasets
        combined_train_val = pd.concat([datasets["train"], datasets["val"]], ignore_index=True)

        # Split features and labels
        X_train_val[feature] = combined_train_val.iloc[:, :-1]  # All columns except last
        y_train_val[feature] = combined_train_val.iloc[:, -1]  # Last column as label
        X_test[feature] = datasets["test"].iloc[:, :-1]
        y_test[feature] = datasets["test"].iloc[:, -1]

    return X_train_val, y_train_val, X_test, y_test

X_train_val, y_train_val, X_test, y_test = combine_and_split_features(data)

# 3. Train individual classifiers
def train_classifiers(X_train, y_train):
    """Train individual classifiers for each feature set."""
    classifiers = {}

    for feature, X in X_train.items():
        print(f"Training Decision Tree on {feature} features...")
        clf = DecisionTreeClassifier(max_depth=10, random_state=42)
        clf.fit(X, y_train[feature])
        classifiers[feature] = clf

    return classifiers

classifiers = train_classifiers(X_train_val, y_train_val)

# 4. Prepare combined feature sets
def combine_features(X_train, X_test):
    """Combine feature sets for stacking."""
    X_train_combined = pd.concat([X_train[feature] for feature in X_train.keys()], axis=1)
    X_test_combined = pd.concat([X_test[feature] for feature in X_test.keys()], axis=1)
    return X_train_combined, X_test_combined

X_train_combined, X_test_combined = combine_features(X_train_val, X_test)

# Use labels from one feature set (they are the same for all sets)
y_train_combined = y_train_val[next(iter(classifiers.keys()))]
y_test_combined = y_test[next(iter(classifiers.keys()))]

# 5. Stacking Classifier
def create_stacking_classifier(X_train_combined, y_train_combined, X_test_combined, y_test_combined):
    """
    Create and train a StackingClassifier for improved accuracy.
    """
    # Base classifiers
    base_estimators = [
        ("dt_frequency", classifiers["Frequency"]),
        ("dt_gabor", classifiers["Gabor"]),
        # ("dt_lbp", classifiers["LBP"]),
        ("dt_hog", classifiers["HOG"]),
        ("dt_statistical", classifiers["Statistical"]),
    ]

# ["Frequency", "Gabor", "HOG", "Statistical"]

    # Meta-classifier
    meta_clf = GradientBoostingClassifier(n_estimators=50, random_state=42)

    # Stacking Classifier
    stacking_clf = StackingClassifier(estimators=base_estimators, final_estimator=meta_clf, cv=5)
    
    print("\nTraining Stacking Classifier...")
    stacking_clf.fit(X_train_combined, y_train_combined)

    # Evaluate on test data
    print("\nEvaluating Stacking Classifier...")
    y_pred = stacking_clf.predict(X_test_combined)
    print(classification_report(y_test_combined, y_pred, digits=4))

    return stacking_clf, y_pred

# Call the function
stacking_clf, stacking_predictions = create_stacking_classifier(
    X_train_combined, y_train_combined, X_test_combined, y_test_combined
)

# 6. Print Stacking Classifier Accuracy
stacking_accuracy = accuracy_score(y_test_combined, stacking_predictions)
print(f"Stacking Classifier Accuracy: {stacking_accuracy:.4f}")
