In [None]:

import os
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
from sklearn.calibration import CalibratedClassifierCV
from sklearn.ensemble import RandomForestClassifier

# 1. Load feature files
def load_feature_files(base_path):
    """Load train, validation, and test CSV files from each feature folder."""
    #feature_sets = ["Frequency", "Gabor", "LBP", "HOG", "Statistical"]
    # feature_sets = ["HOG", "Frequency", "Statistical", "Gabor"]
    # feature_sets = ["Frequency", "Gabor", "Statistical"]
    feature_sets = ["Frequency", "Gabor", "ColorHistogram", "HOG", "Statistical"]
    data = {}

    for feature in feature_sets:
        train_path = os.path.join(base_path, feature, "train.csv")
        val_path = os.path.join(base_path, feature, "val.csv")
        test_path = os.path.join(base_path, feature, "test.csv")

        data[feature] = {
            "train": pd.read_csv(train_path),
            "val": pd.read_csv(val_path),
            "test": pd.read_csv(test_path),
        }

    return data

# Update the path to your dataset
base_path = r"E:\Abroad period research\Feature Fusion paper\Eye dataset\Final codes for github\Features"
data = load_feature_files(base_path)

# 2. Combine train and val files, then split features and labels
def combine_and_split_features(data):
    """Combine train and val datasets, and split features and labels."""
    X_train_val, y_train_val = {}, {}
    X_test, y_test = {}, {}

    for feature, datasets in data.items():
        # Combine train and val datasets
        combined_train_val = pd.concat([datasets["train"], datasets["val"]], ignore_index=True)

        # Split features and labels
        X_train_val[feature] = combined_train_val.iloc[:, :-1]  # All columns except last
        y_train_val[feature] = combined_train_val.iloc[:, -1]  # Last column as label
        X_test[feature] = datasets["test"].iloc[:, :-1]
        y_test[feature] = datasets["test"].iloc[:, -1]

    return X_train_val, y_train_val, X_test, y_test

X_train_val, y_train_val, X_test, y_test = combine_and_split_features(data)

# 3. Combine all feature sets into a single DataFrame
def combine_features(X_train, X_test):
    """Concatenate features from all sets into single training and testing DataFrames."""
    X_train_combined = pd.concat(X_train.values(), axis=1)
    X_test_combined = pd.concat(X_test.values(), axis=1)
    return X_train_combined, X_test_combined

X_train_combined, X_test_combined = combine_features(X_train_val, X_test)
y_train_combined = y_train_val[next(iter(y_train_val.keys()))]
y_test_combined = y_test[next(iter(y_test.keys()))]

# 4. Hyperparameter tuning for DecisionTreeClassifier
def tune_decision_tree(X, y):
    """Tune hyperparameters of Decision Tree using GridSearchCV."""
    param_grid = {
        "max_depth": [5, 10, 15],
        "min_samples_split": [2, 5, 10],
        "min_samples_leaf": [1, 2, 5],
    }
    grid_search = GridSearchCV(
        DecisionTreeClassifier(random_state=42),
        param_grid,
        cv=3,
        scoring="accuracy",
        n_jobs=-1,
    )
    grid_search.fit(X, y)
    print(f"Best Decision Tree Parameters: {grid_search.best_params_}")
    return grid_search.best_estimator_

# Train tuned Decision Tree
tuned_tree = tune_decision_tree(X_train_combined, y_train_combined)

# Calibrate probabilities for soft voting
calibrated_tree = CalibratedClassifierCV(tuned_tree, method="sigmoid")
calibrated_tree.fit(X_train_combined, y_train_combined)

# 5. Define additional classifiers (e.g., Random Forest)
random_forest = RandomForestClassifier(n_estimators=100, random_state=42)
random_forest.fit(X_train_combined, y_train_combined)

# Calibrate Random Forest probabilities
calibrated_rf = CalibratedClassifierCV(random_forest, method="sigmoid")
calibrated_rf.fit(X_train_combined, y_train_combined)

# 6. Create a Voting Classifier with calibrated classifiers
voting_clf = VotingClassifier(
    estimators=[
        ("calibrated_tree", calibrated_tree),
        ("calibrated_rf", calibrated_rf),
    ],
    voting="soft",  # Soft voting for probability-based decisions
)

# Train the Voting Classifier
voting_clf.fit(X_train_combined, y_train_combined)

# Evaluate on test data
y_pred = voting_clf.predict(X_test_combined)
print("\nEnhanced Voting Classifier Results:")
print(classification_report(y_test_combined, y_pred, digits=4))

# 7. Print the final accuracy
final_accuracy = accuracy_score(y_test_combined, y_pred)
print(f"Enhanced Voting Classifier Accuracy: {final_accuracy:.4f}")
