In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    classification_report,
    accuracy_score,
    confusion_matrix
)
from lime.lime_tabular import LimeTabularExplainer

# --- 1. Prewitt edge detection implementation ---
def apply_prewitt_edge(image):
    kernelx = np.array([[1, 0, -1],
                        [1, 0, -1],
                        [1, 0, -1]], dtype=np.float32)
    kernely = np.array([[1, 1, 1],
                        [0, 0, 0],
                        [-1, -1, -1]], dtype=np.float32)

    img_x = cv2.filter2D(image, -1, kernelx)
    img_y = cv2.filter2D(image, -1, kernely)
    edge_magnitude = cv2.magnitude(img_x.astype(np.float32), img_y.astype(np.float32))
    return edge_magnitude

# --- 2. Load images and apply Prewitt ---
def load_images_with_prewitt(folder_path, image_size=(128, 128)):
    X, y, image_paths = [], [], []
    class_labels = os.listdir(folder_path)
    for label in class_labels:
        label_folder = os.path.join(folder_path, label)
        if not os.path.isdir(label_folder):
            continue
        for file in os.listdir(label_folder):
            img_path = os.path.join(label_folder, file)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is not None:
                img = cv2.resize(img, image_size)
                prewitt = apply_prewitt_edge(img)
                X.append(prewitt.flatten())
                y.append(label)
                image_paths.append(img_path)
    return np.array(X), np.array(y), image_paths

# --- 3. Pipeline ---
def run_pipeline(folder_path):
    print("[INFO] Loading and applying Prewitt edge detection...")
    X, y, paths = load_images_with_prewitt(folder_path)
    print(f"[INFO] Original features per sample: {X.shape[1]}")

    print("[INFO] Encoding labels...")
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)

    # --- Apply LDA for dimensionality reduction ---
    print("[INFO] Applying LDA...")
    lda = LDA(n_components=None)
    X_lda = lda.fit_transform(X, y_encoded)
    print(f"[INFO] Features after LDA: {X_lda.shape[1]}")

    # --- Train-test split ---
    X_train, X_test, y_train, y_test, path_train, path_test = train_test_split(
        X_lda, y_encoded, paths, test_size=0.2, random_state=42, stratify=y_encoded
    )

    classifiers = {
        "SVM": SVC(probability=True),
        "Logistic Regression": LogisticRegression(max_iter=1000),
        "Random Forest": RandomForestClassifier()
    }

    for name, clf in classifiers.items():
        print(f"\n[INFO] Training {name}...")
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)

        print(f"[RESULT] {name} Accuracy: {accuracy_score(y_test, y_pred):.4f}")
        print(classification_report(y_test, y_pred, target_names=le.classes_))

        # --- Improved Confusion Matrix Visualization ---
        cm = confusion_matrix(y_test, y_pred)
        cm_normalized = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

        fig, ax = plt.subplots(figsize=(max(10, len(le.classes_) * 0.8),
                                        max(8, len(le.classes_) * 0.6)))
        im = ax.imshow(cm_normalized, interpolation='nearest', cmap='Blues')

        cbar = plt.colorbar(im, ax=ax)
        cbar.ax.set_ylabel("Normalized Frequency", rotation=-90, va="bottom")

        ax.set_xticks(np.arange(len(le.classes_)))
        ax.set_yticks(np.arange(len(le.classes_)))
        ax.set_xticklabels(le.classes_, rotation=45, ha="right")
        ax.set_yticklabels(le.classes_)

        ax.set_xlabel("Predicted Label", fontsize=12)
        ax.set_ylabel("True Label", fontsize=12)
        ax.set_title(f"{name} - Normalized Confusion Matrix", fontsize=16)

        # Annotate each cell with counts + percentages
        thresh = cm_normalized.max() / 2.
        for i in range(cm.shape[0]):
            for j in range(cm.shape[1]):
                ax.text(j, i,
                        f"{cm[i, j]}\n({cm_normalized[i, j]*100:.1f}%)",
                        ha="center", va="center",
                        color="white" if cm_normalized[i, j] > thresh else "black",
                        fontsize=8)

        plt.tight_layout()
        plt.show()

        # --- LIME Explanation ---
        print("[INFO] Running LIME explanation...")
        explainer = LimeTabularExplainer(
            training_data=X_train,
            feature_names=[f"f{i}" for i in range(X_train.shape[1])],
            class_names=le.classes_,
            discretize_continuous=True,
            mode='classification'
        )

        sample_idx = 0  # Change this index to visualize other samples
        exp = explainer.explain_instance(X_test[sample_idx], clf.predict_proba, num_features=10)

        exp.as_pyplot_figure()
        plt.title(f"LIME Explanation - {name} (Sample {sample_idx})")
        plt.tight_layout()
        plt.show()

# --- 4. Run the pipeline ---
folder_path = r"/content/drive/MyDrive/augmentation"
run_pipeline(folder_path)


[INFO] Loading and applying Prewitt edge detection...


In [2]:
pip install lime

Collecting lime
  Downloading lime-0.2.0.1.tar.gz (275 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/275.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m266.2/275.7 kB[0m [31m11.0 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m275.7/275.7 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: lime
  Building wheel for lime (setup.py) ... [?25l[?25hdone
  Created wheel for lime: filename=lime-0.2.0.1-py3-none-any.whl size=283834 sha256=2cac7320e7a343dc660fc045664565d352a4a115a2a7450296f52f52af0b4007
  Stored in directory: /root/.cache/pip/wheels/e7/5d/0e/4b4fff9a47468fed5633211fb3b76d1db43fe806a17fb7486a
Successfully built lime
Installing collected packages: lime
Successfully installed lime-0.2.0.1
