In [None]:


import os
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import warnings

warnings.filterwarnings("ignore")

# 1. Load feature files
def load_feature_files(base_path):
    """Load train, validation, and test CSV files from each feature folder."""
    feature_sets = ["Frequency", "Gabor", "LBP_Features", "HOG", "Statistical"]
    data = {}

    for feature in feature_sets:
        train_path = os.path.join(base_path, feature, "train.csv")
        val_path = os.path.join(base_path, feature, "val.csv")
        test_path = os.path.join(base_path, feature, "test.csv")

        data[feature] = {
            "train": pd.read_csv(train_path),
            "val": pd.read_csv(val_path),
            "test": pd.read_csv(test_path),
        }

    return data

# Path to your dataset (update this path accordingly)
base_path = r"E:\Abroad period research\Feature Fusion paper\Ultrasound Breast Cancer\Features"
data = load_feature_files(base_path)

# 2. Combine train and val features, and split features and labels
def combine_and_split_features(data):
    """Combine train and val datasets, and split features and labels."""
    X_train_val, y_train_val = {}, {}
    X_test, y_test = {}, {}

    for feature, datasets in data.items():
        # Combine train and val datasets
        combined_train_val = pd.concat([datasets["train"], datasets["val"]], ignore_index=True)

        # Split features and labels
        X_train_val[feature] = combined_train_val.iloc[:, :-1]  # All columns except last
        y_train_val[feature] = combined_train_val.iloc[:, -1]  # Last column as label
        X_test[feature] = datasets["test"].iloc[:, :-1]
        y_test[feature] = datasets["test"].iloc[:, -1]

    return X_train_val, y_train_val, X_test, y_test

X_train_val, y_train_val, X_test, y_test = combine_and_split_features(data)

# 3. Train individual Decision Tree classifiers
def train_decision_trees(X_train, y_train):
    """Train Decision Tree classifiers for each feature set."""
    classifiers = {}

    for feature, X in X_train.items():
        print(f"Training Decision Tree on {feature} features...")
        clf = DecisionTreeClassifier(max_depth=10, random_state=42)
        clf.fit(X, y_train[feature])
        classifiers[feature] = clf

    return classifiers

classifiers = train_decision_trees(X_train_val, y_train_val)

# 4. Evaluate individual classifiers
def evaluate_classifiers(classifiers, X_test, y_test):
    """Evaluate each Decision Tree classifier on its respective test set."""
    results = {}

    for feature, clf in classifiers.items():
        print(f"\nEvaluating Decision Tree on {feature} features...")
        y_pred = clf.predict(X_test[feature])
        accuracy = accuracy_score(y_test[feature], y_pred)
        precision = precision_score(y_test[feature], y_pred, average="weighted")
        recall = recall_score(y_test[feature], y_pred, average="weighted")
        f1 = f1_score(y_test[feature], y_pred, average="weighted")
        results[feature] = {"accuracy": accuracy, "precision": precision, "recall": recall, "f1_score": f1}

        print(f"Accuracy: {accuracy:.2f}, Precision: {precision:.2f}, Recall: {recall:.2f}, F1 Score: {f1:.2f}")

    return results

results = evaluate_classifiers(classifiers, X_test, y_test)

# 5. Multi-Level Fusion with Voting Classifier
def create_voting_ensemble(classifiers, X_train, y_train, X_test, y_test):
    """
    Combine classifiers using a Voting Classifier.
    """
    # Create VotingClassifier with individual classifiers
    voting_clf = VotingClassifier(
        estimators=[(f"{feature}_clf", clf) for feature, clf in classifiers.items()],
        # voting="hard",
        voting="soft",
    )

    # Prepare combined feature sets for VotingClassifier
    X_train_combined = pd.concat([X_train[feature] for feature in classifiers.keys()], axis=1)
    X_test_combined = pd.concat([X_test[feature] for feature in classifiers.keys()], axis=1)

    # Ensure that labels match the combined features
    # Since all feature sets share the same labels, use one of them as the target
    y_train_combined = y_train[next(iter(classifiers.keys()))]
    y_test_combined = y_test[next(iter(classifiers.keys()))]

    # Train the VotingClassifier on combined features
    voting_clf.fit(X_train_combined, y_train_combined)

    # Evaluate on test data
    y_pred = voting_clf.predict(X_test_combined)
    print("\nVoting Classifier Results (Multi-Level Fusion):")
    print(classification_report(y_test_combined, y_pred, digits=4))

    return voting_clf, y_pred

# Call the function
voting_clf, voting_predictions = create_voting_ensemble(classifiers, X_train_val, y_train_val, X_test, y_test)

# 6. Print Voting Classifier Accuracy
voting_accuracy = accuracy_score(
    y_test[next(iter(classifiers.keys()))], voting_predictions
)
print(f"Voting Classifier Accuracy: {voting_accuracy:.2f}")
