In [None]:

import os
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
from sklearn.calibration import CalibratedClassifierCV
from sklearn.ensemble import RandomForestClassifier

# 1. Load feature files
def load_feature_files(base_path):
    """Load train, validation, and test CSV files from each feature folder."""
    #feature_sets = ["Frequency", "Gabor", "LBP", "HOG", "Statistical"]
    feature_sets = ["Frequency", "Gabor"]
    data = {}

    for feature in feature_sets:
        train_path = os.path.join(base_path, feature, "train.csv")
        val_path = os.path.join(base_path, feature, "val.csv")
        test_path = os.path.join(base_path, feature, "test.csv")

        data[feature] = {
            "train": pd.read_csv(train_path),
            "val": pd.read_csv(val_path),
            "test": pd.read_csv(test_path),
        }

    return data

# Update the path to your dataset
base_path = r"E:\Abroad period research\hybrid oranges try code\testing code on brain tumor dataset\Features"
data = load_feature_files(base_path)

# 2. Combine train and val files, then split features and labels
def combine_and_split_features(data):
    """Combine train and val datasets, and split features and labels."""
    X_train_val, y_train_val = {}, {}
    X_test, y_test = {}, {}

    for feature, datasets in data.items():
        # Combine train and val datasets
        combined_train_val = pd.concat([datasets["train"], datasets["val"]], ignore_index=True)

        # Split features and labels
        X_train_val[feature] = combined_train_val.iloc[:, :-1]  # All columns except last
        y_train_val[feature] = combined_train_val.iloc[:, -1]  # Last column as label
        X_test[feature] = datasets["test"].iloc[:, :-1]
        y_test[feature] = datasets["test"].iloc[:, -1]

    return X_train_val, y_train_val, X_test, y_test

X_train_val, y_train_val, X_test, y_test = combine_and_split_features(data)

# 3. Combine all feature sets into a single DataFrame
def combine_features(X_train, X_test):
    """Concatenate features from all sets into single training and testing DataFrames."""
    X_train_combined = pd.concat(X_train.values(), axis=1)
    X_test_combined = pd.concat(X_test.values(), axis=1)
    return X_train_combined, X_test_combined

X_train_combined, X_test_combined = combine_features(X_train_val, X_test)
y_train_combined = y_train_val[next(iter(y_train_val.keys()))]
y_test_combined = y_test[next(iter(y_test.keys()))]

# 4. Hyperparameter tuning for DecisionTreeClassifier
def tune_decision_tree(X, y):
    """Tune hyperparameters of Decision Tree using GridSearchCV."""
    param_grid = {
        "max_depth": [5, 10, 15],
        "min_samples_split": [2, 5, 10],
        "min_samples_leaf": [1, 2, 5],
    }
    grid_search = GridSearchCV(
        DecisionTreeClassifier(random_state=42),
        param_grid,
        cv=3,
        scoring="accuracy",
        n_jobs=-1,
    )
    grid_search.fit(X, y)
    print(f"Best Decision Tree Parameters: {grid_search.best_params_}")
    return grid_search.best_estimator_

# Train tuned Decision Tree
tuned_tree = tune_decision_tree(X_train_combined, y_train_combined)

# Calibrate probabilities for soft voting
calibrated_tree = CalibratedClassifierCV(tuned_tree, method="sigmoid")
calibrated_tree.fit(X_train_combined, y_train_combined)

# 5. Define additional classifiers (e.g., Random Forest)
random_forest = RandomForestClassifier(n_estimators=100, random_state=42)
random_forest.fit(X_train_combined, y_train_combined)

# Calibrate Random Forest probabilities
calibrated_rf = CalibratedClassifierCV(random_forest, method="sigmoid")
calibrated_rf.fit(X_train_combined, y_train_combined)

# 6. Create a Voting Classifier with calibrated classifiers
voting_clf = VotingClassifier(
    estimators=[
        ("calibrated_tree", calibrated_tree),
        ("calibrated_rf", calibrated_rf),
    ],
    voting="soft",  # Soft voting for probability-based decisions
)

# Train the Voting Classifier
voting_clf.fit(X_train_combined, y_train_combined)

# Evaluate on test data
y_pred = voting_clf.predict(X_test_combined)
print("\nEnhanced Voting Classifier Results:")
print(classification_report(y_test_combined, y_pred, digits=4))

# 7. Print the final accuracy
final_accuracy = accuracy_score(y_test_combined, y_pred)
print(f"Enhanced Voting Classifier Accuracy: {final_accuracy:.4f}")


LIME 

In [None]:
import lime
from lime.lime_tabular import LimeTabularExplainer
import numpy as np

# Initialize LIME Explainer
lime_explainer = LimeTabularExplainer(
    training_data=X_train_combined.values,
    training_labels=y_train_combined.values,
    mode="classification",
    feature_names=X_train_combined.columns.tolist(),
    class_names=[f"Class {cls}" for cls in np.unique(y_train_combined)],
    discretize_continuous=True,
    random_state=42,
)

# Select instances for explanation (e.g., first 2 instances from the test set)
num_samples_to_explain = 2
instances_to_explain = X_test_combined.iloc[:num_samples_to_explain]

# Explain each selected instance
for idx, instance in instances_to_explain.iterrows():
    print(f"\nExplaining instance {idx}...")

    # Generate explanation for the instance
    explanation = lime_explainer.explain_instance(
        data_row=instance.values,
        predict_fn=voting_clf.predict_proba,
        num_features=10,  # Number of top features to display in the explanation
    )

    # Save explanation as an HTML file
    html_filename = f"lime_explanation_instance_{idx}.html"
    explanation.save_to_file(html_filename)
    print(f"LIME explanation for instance {idx} saved as '{html_filename}'")

    # Optional: Display the explanation in the console
    explanation.show_in_notebook()


Saving LIME

In [None]:


from sklearn.feature_selection import SelectKBest, mutual_info_classif
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score
import numpy as np
import pandas as pd
import lime
from lime.lime_tabular import LimeTabularExplainer

# 8. Perform Class-Specific Feature Selection and Evaluation
def perform_class_specific_feature_selection(X_train, y_train, X_test, y_test, num_features=5):
    """
    Perform one-vs-all feature selection and train a Decision Tree for each class.
    """
    class_specific_features = {}
    class_classifiers = {}
    evaluation_results = {}
    lime_explainers = {}
    
    # Get the unique classes from the training labels
    classes = np.unique(y_train)
    
    for class_label in classes:
        print(f"\nPerforming feature selection and training for class {class_label}...")

        # One-vs-all approach (binary labels: 1 for current class, 0 for others)
        y_binary_train = y_train.apply(lambda x: 1 if x == class_label else 0)
        y_binary_test = y_test.apply(lambda x: 1 if x == class_label else 0)
        
        # Perform mutual information-based feature selection for this class
        selector = SelectKBest(score_func=mutual_info_classif, k=num_features)
        selector.fit(X_train, y_binary_train)
        
        # Get selected feature indices and names
        selected_indices = selector.get_support(indices=True)
        selected_features = X_train.columns[selected_indices]
        
        # Store class-specific features
        class_specific_features[class_label] = selected_features
        print(f"Selected features for class {class_label}: {list(selected_features)}")
        
        # Train a Decision Tree Classifier using only the selected features for this class
        X_train_selected = X_train[selected_features]
        X_test_selected = X_test[selected_features]
        
        clf = DecisionTreeClassifier(max_depth=10, random_state=42)
        clf.fit(X_train_selected, y_binary_train)
        
        # Store the classifier for this class
        class_classifiers[class_label] = clf
        
        # Evaluate the classifier on test data
        y_pred_test = clf.predict(X_test_selected)
        accuracy = accuracy_score(y_binary_test, y_pred_test)
        report = classification_report(y_binary_test, y_pred_test, digits=4)
        
        # Store evaluation results
        evaluation_results[class_label] = {
            "accuracy": accuracy,
            "classification_report": report
        }
        
        print(f"Accuracy for class {class_label}: {accuracy:.4f}")
        print(f"Classification Report for class {class_label}:\n{report}")
        
        # Initialize LIME Explainer for the current class
        lime_explainer = LimeTabularExplainer(
            training_data=X_train_selected.values,
            training_labels=y_binary_train,
            mode='classification',
            feature_names=selected_features.tolist(),
            class_names=[f'Not {class_label}', f'{class_label}'],
            discretize_continuous=True
        )
        lime_explainers[class_label] = lime_explainer
        
    return class_specific_features, class_classifiers, evaluation_results, lime_explainers


# Prepare the inputs for feature selection
# Combine training and validation sets for feature selection
X_train_combined, X_test_combined = combine_features(X_train_val, X_test)
y_train_combined = y_train_val[next(iter(y_train_val.keys()))]
y_test_combined = y_test[next(iter(y_test.keys()))]

# Call the function for class-specific feature selection and evaluation
class_specific_features, class_classifiers, evaluation_results, lime_explainers = perform_class_specific_feature_selection(
    X_train_combined, y_train_combined, X_test_combined, y_test_combined, num_features=5
)

# Display the results
print("\nClass-Specific Feature Selection Results:")
for class_label, results in evaluation_results.items():
    print(f"\nClass {class_label} Results:")
    print(f"Accuracy: {results['accuracy']:.4f}")
    print(f"Classification Report:\n{results['classification_report']}")

# Example: Explaining predictions for 2 sample instances from each class (from X_test_combined)
for class_label in class_specific_features.keys():
    print(f"\nExplaining predictions for class {class_label}...")
    
    # Get the classifier and selected features for the current class
    selected_features = class_specific_features[class_label]  # Get selected features for the current class
    lime_explainer = lime_explainers[class_label]

    # Get 2 samples from the test set for the current class
    class_samples = X_test_combined[y_test_combined == class_label].iloc[:2]  # Get first 2 samples for this class

    for sample_index, sample_instance in class_samples.iterrows():
        # Make a prediction for the chosen instance using the model for the current class
        predicted_class = class_classifiers[class_label].predict([sample_instance[selected_features].values])[0]

        # Explain the prediction for this instance
        explanation = lime_explainer.explain_instance(
            sample_instance[selected_features].values, 
            class_classifiers[class_label].predict_proba, 
            num_features=5
        )

        # Save the explanation as an HTML file for each sample (with UTF-8 encoding)
        html_output = explanation.as_html()

        # Inject custom CSS to remove scrollbars
        html_output = html_output.replace(
            "<head>",
            """<head><style>body { overflow: hidden; }</style>"""
        )

        html_filename = f"lime_explanation_class_{class_label}_sample_{sample_index}.html"
        
        # Use UTF-8 encoding to avoid UnicodeEncodeError
        with open(html_filename, "w", encoding="utf-8") as f:
            f.write(html_output)

        print(f"LIME explanation for sample {sample_index} of class {class_label} saved as '{html_filename}'")
        
        # Print Intercept, Local Prediction, and Right (if available)
        print("\nAdditional LIME Details:")
        print(f"Intercept: {explanation.intercept}")
        print(f"Local Prediction: {explanation.local_pred}")
        print(f"Right: {explanation.score}")


Saving Explaination as HTML page with Additional LIME Details such as 
Intercept, Local Prediction, and Right. 

In [None]:
from sklearn.feature_selection import SelectKBest, mutual_info_classif
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score
import numpy as np
import pandas as pd
import lime
from lime.lime_tabular import LimeTabularExplainer

# 8. Perform Class-Specific Feature Selection and Evaluation
def perform_class_specific_feature_selection(X_train, y_train, X_test, y_test, num_features=5):
    """
    Perform one-vs-all feature selection and train a Decision Tree for each class.
    """
    class_specific_features = {}
    class_classifiers = {}
    evaluation_results = {}
    lime_explainers = {}
    
    # Get the unique classes from the training labels
    classes = np.unique(y_train)
    
    for class_label in classes:
        print(f"\nPerforming feature selection and training for class {class_label}...")

        # One-vs-all approach (binary labels: 1 for current class, 0 for others)
        y_binary_train = y_train.apply(lambda x: 1 if x == class_label else 0)
        y_binary_test = y_test.apply(lambda x: 1 if x == class_label else 0)
        
        # Perform mutual information-based feature selection for this class
        selector = SelectKBest(score_func=mutual_info_classif, k=num_features)
        selector.fit(X_train, y_binary_train)
        
        # Get selected feature indices and names
        selected_indices = selector.get_support(indices=True)
        selected_features = X_train.columns[selected_indices]
        
        # Store class-specific features
        class_specific_features[class_label] = selected_features
        print(f"Selected features for class {class_label}: {list(selected_features)}")
        
        # Train a Decision Tree Classifier using only the selected features for this class
        X_train_selected = X_train[selected_features]
        X_test_selected = X_test[selected_features]
        
        clf = DecisionTreeClassifier(max_depth=10, random_state=42)
        clf.fit(X_train_selected, y_binary_train)
        
        # Store the classifier for this class
        class_classifiers[class_label] = clf
        
        # Evaluate the classifier on test data
        y_pred_test = clf.predict(X_test_selected)
        accuracy = accuracy_score(y_binary_test, y_pred_test)
        report = classification_report(y_binary_test, y_pred_test, digits=4)
        
        # Store evaluation results
        evaluation_results[class_label] = {
            "accuracy": accuracy,
            "classification_report": report
        }
        
        print(f"Accuracy for class {class_label}: {accuracy:.4f}")
        print(f"Classification Report for class {class_label}:\n{report}")
        
        # Initialize LIME Explainer for the current class
        lime_explainer = LimeTabularExplainer(
            training_data=X_train_selected.values,
            training_labels=y_binary_train,
            mode='classification',
            feature_names=selected_features.tolist(),
            class_names=[f'Not {class_label}', f'{class_label}'],
            discretize_continuous=True
        )
        lime_explainers[class_label] = lime_explainer
        
    return class_specific_features, class_classifiers, evaluation_results, lime_explainers


# Prepare the inputs for feature selection
# Combine training and validation sets for feature selection
X_train_combined, X_test_combined = combine_features(X_train_val, X_test)
y_train_combined = y_train_val[next(iter(y_train_val.keys()))]
y_test_combined = y_test[next(iter(y_test.keys()))]

# Call the function for class-specific feature selection and evaluation
class_specific_features, class_classifiers, evaluation_results, lime_explainers = perform_class_specific_feature_selection(
    X_train_combined, y_train_combined, X_test_combined, y_test_combined, num_features=5
)

# Display the results
print("\nClass-Specific Feature Selection Results:")
for class_label, results in evaluation_results.items():
    print(f"\nClass {class_label} Results:")
    print(f"Accuracy: {results['accuracy']:.4f}")
    print(f"Classification Report:\n{results['classification_report']}")

# Example: Explaining predictions for 2 sample instances from each class (from X_test_combined)
for class_label in class_specific_features.keys():
    print(f"\nExplaining predictions for class {class_label}...")
    
    # Get the classifier and selected features for the current class
    selected_features = class_specific_features[class_label]  # Get selected features for the current class
    lime_explainer = lime_explainers[class_label]

    # Get 2 samples from the test set for the current class
    class_samples = X_test_combined[y_test_combined == class_label].iloc[:2]  # Get first 2 samples for this class

    for sample_index, sample_instance in class_samples.iterrows():
        # Make a prediction for the chosen instance using the model for the current class
        predicted_class = class_classifiers[class_label].predict([sample_instance[selected_features].values])[0]

        # Explain the prediction for this instance
        explanation = lime_explainer.explain_instance(
            sample_instance[selected_features].values, 
            class_classifiers[class_label].predict_proba, 
            num_features=5
        )

        # Append additional LIME details (Intercept, Local Prediction, Right score)
        additional_details = f"""
        <br><strong>Intercept:</strong> {explanation.intercept}<br>
        <strong>Local Prediction:</strong> {explanation.local_pred}<br>
        <strong>Right:</strong> {explanation.score}<br>
        """

        # Add the additional details to the explanation HTML output
        html_output = explanation.as_html() + additional_details
        
        # Save the explanation as an HTML file for each sample (with UTF-8 encoding)
        html_filename = f"lime_explanation_class_{class_label}_sample_{sample_index}.html"
        
        # Use UTF-8 encoding to avoid UnicodeEncodeError
        with open(html_filename, "w", encoding="utf-8") as f:
            f.write(html_output)

        print(f"LIME explanation for sample {sample_index} of class {class_label} saved as '{html_filename}'")


Final LIME code 

In [None]:
from sklearn.feature_selection import SelectKBest, mutual_info_classif
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score
import numpy as np
import pandas as pd
import lime
from lime.lime_tabular import LimeTabularExplainer

# 8. Perform Class-Specific Feature Selection and Evaluation
def perform_class_specific_feature_selection(X_train, y_train, X_test, y_test, num_features=5):
    """
    Perform one-vs-all feature selection and train a Decision Tree for each class.
    """
    class_specific_features = {}
    class_classifiers = {}
    evaluation_results = {}
    lime_explainers = {}
    
    # Get the unique classes from the training labels
    classes = np.unique(y_train)
    
    for class_label in classes:
        print(f"\nPerforming feature selection and training for class {class_label}...")

        # One-vs-all approach (binary labels: 1 for current class, 0 for others)
        y_binary_train = y_train.apply(lambda x: 1 if x == class_label else 0)
        y_binary_test = y_test.apply(lambda x: 1 if x == class_label else 0)
        
        # Perform mutual information-based feature selection for this class
        selector = SelectKBest(score_func=mutual_info_classif, k=num_features)
        selector.fit(X_train, y_binary_train)
        
        # Get selected feature indices and names
        selected_indices = selector.get_support(indices=True)
        selected_features = X_train.columns[selected_indices]
        
        # Store class-specific features
        class_specific_features[class_label] = selected_features
        print(f"Selected features for class {class_label}: {list(selected_features)}")
        
        # Train a Decision Tree Classifier using only the selected features for this class
        X_train_selected = X_train[selected_features]
        X_test_selected = X_test[selected_features]
        
        clf = DecisionTreeClassifier(max_depth=10, random_state=42)
        clf.fit(X_train_selected, y_binary_train)
        
        # Store the classifier for this class
        class_classifiers[class_label] = clf
        
        # Evaluate the classifier on test data
        y_pred_test = clf.predict(X_test_selected)
        accuracy = accuracy_score(y_binary_test, y_pred_test)
        report = classification_report(y_binary_test, y_pred_test, digits=4)
        
        # Store evaluation results
        evaluation_results[class_label] = {
            "accuracy": accuracy,
            "classification_report": report
        }
        
        print(f"Accuracy for class {class_label}: {accuracy:.4f}")
        print(f"Classification Report for class {class_label}:\n{report}")
        
        # Initialize LIME Explainer for the current class
        lime_explainer = LimeTabularExplainer(
            training_data=X_train_selected.values,
            training_labels=y_binary_train,
            mode='classification',
            feature_names=selected_features.tolist(),
            # FIX: use Class 0 / Class 1
            class_names=['Class 0', 'Class 1'],
            discretize_continuous=True
        )
        lime_explainers[class_label] = lime_explainer
        
    return class_specific_features, class_classifiers, evaluation_results, lime_explainers


# === Prepare the inputs for feature selection ===
# NOTE: Ensure combine_features(), X_train_val, X_test, y_train_val, y_test are defined earlier
X_train_combined, X_test_combined = combine_features(X_train_val, X_test)
y_train_combined = y_train_val[next(iter(y_train_val.keys()))]
y_test_combined = y_test[next(iter(y_test.keys()))]

# Call the function for class-specific feature selection and evaluation
class_specific_features, class_classifiers, evaluation_results, lime_explainers = perform_class_specific_feature_selection(
    X_train_combined, y_train_combined, X_test_combined, y_test_combined, num_features=5
)

# Display the results
print("\nClass-Specific Feature Selection Results:")
for class_label, results in evaluation_results.items():
    print(f"\nClass {class_label} Results:")
    print(f"Accuracy: {results['accuracy']:.4f}")
    print(f"Classification Report:\n{results['classification_report']}")

# === Example: Explaining predictions for 2 sample instances from each class ===
for class_label in class_specific_features.keys():
    print(f"\nExplaining predictions for class {class_label}...")
    
    # Get the classifier and selected features for the current class
    selected_features = class_specific_features[class_label]
    lime_explainer = lime_explainers[class_label]

    # Get 2 samples from the test set for the current class
    class_samples = X_test_combined[y_test_combined == class_label].iloc[:2]

    for sample_index, sample_instance in class_samples.iterrows():
        # Make a prediction for the chosen instance
        predicted_class = class_classifiers[class_label].predict([sample_instance[selected_features].values])[0]

        # Explain the prediction for this instance
        explanation = lime_explainer.explain_instance(
            sample_instance[selected_features].values, 
            class_classifiers[class_label].predict_proba, 
            num_features=5
        )

        # ✅ Keep the colorful HTML from as_html()
        html_output = explanation.as_html()

        # ✅ Add extra details below the chart, separated with spacing
        additional_details = f"""
        <br><hr>
        <strong>Predicted Class:</strong> Class {predicted_class}<br>
        <strong>Intercept:</strong> {explanation.intercept}<br>
        <strong>Local Prediction:</strong> {explanation.local_pred}<br>
        <strong>Score:</strong> {explanation.score}<br>
        """

        # Final HTML with graph + extra info
        html_output += additional_details
        
        # Save the explanation as an HTML file
        html_filename = f"lime_explanation_class_{class_label}_sample_{sample_index}.html"
        with open(html_filename, "w", encoding="utf-8") as f:
            f.write(html_output)

        print(f"LIME explanation for sample {sample_index} of class {class_label} saved as '{html_filename}'")
