<a href="https://colab.research.google.com/github/Sk8133/Explainable-AI/blob/main/E_AI_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install lime

Collecting lime
  Downloading lime-0.2.0.1.tar.gz (275 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/275.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m266.2/275.7 kB[0m [31m11.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m275.7/275.7 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: lime
  Building wheel for lime (setup.py) ... [?25l[?25hdone
  Created wheel for lime: filename=lime-0.2.0.1-py3-none-any.whl size=283834 sha256=291b647b66df1bb736ea078e7d162ff2ab698a083e35ae6ba7180a687d10159f
  Stored in directory: /root/.cache/pip/wheels/e7/5d/0e/4b4fff9a47468fed5633211fb3b76d1db43fe806a17fb7486a
Successfully built lime
Installing collected packages: lime
Successfully installed lime-0.2.0.1


In [2]:
pip install pandas scikit-learn lime matplotlib seaborn




In [6]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import zipfile
import os

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
from lime.lime_tabular import LimeTabularExplainer

# 1. Load Dataset
# Replace with actual dataset path
try:
    # Extract the CSV from the zip file
    with zipfile.ZipFile("/content/archive.zip", 'r') as zip_ref:
        zip_ref.extractall("/content/")

    # Assuming the CSV file is named 'AirQualityUCI.csv' inside the zip
    csv_file_path = "/content/AirQualityUCI.csv"

    # Read the CSV file with correct parameters
    df = pd.read_csv(csv_file_path, sep=';', decimal=',')

    # Replace string 'nan' with actual NaN values in the entire DataFrame
    df.replace('nan', np.nan, inplace=True)

    # Drop columns with all NaNs and irrelevant columns
    df.dropna(axis=1, how='all', inplace=True)
    df = df.iloc[:, :15]  # Assume first 15 columns are relevant
    df = df.dropna()

    # Rename columns for easier access
    df.columns = [col.strip().replace(' ', '_') for col in df.columns]

    # Create binary label: safe if PM2.5 < 35, unsafe otherwise
    df['PM2.5'] = df['PM2.5'].astype(float) # Convert to float after handling 'nan' strings
    df['Label'] = df['PM2.5'].apply(lambda x: 'unsafe' if x > 35 else 'safe')

    # 2. Train Model
    X = df.drop(['Label', 'Date', 'Time'], axis=1) # Drop Date and Time as well
    y = df['Label']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    print("Accuracy:", accuracy_score(y_test, y_pred))
    print(classification_report(y_test, y_pred))

    # 3. Explain with LIME
    explainer = LimeTabularExplainer(X_train.values,
                                      feature_names=X.columns,
                                      class_names=['safe', 'unsafe'],
                                      discretize_continuous=True)

    i = 10  # Choose test instance
    exp = explainer.explain_instance(X_test.values[i], model.predict_proba, num_features=5)
    exp.show_in_notebook()

    # 4. Visualize Contributions
    fig = exp.as_pyplot_figure()
    plt.title("LIME Explanation for Air Quality Prediction")
    plt.tight_layout()
    plt.show()

except FileNotFoundError:
    print("Error: 'archive.zip' or 'AirQualityUCI.csv' not found. Please ensure the zip file is in the correct path and contains 'AirQualityUCI.csv'.")
except KeyError as e:
    print(f"KeyError: {e}. Please check if the column exists in the dataset after loading.")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

KeyError: 'PM2.5'. Please check if the column exists in the dataset after loading.


In [7]:
pip install tensorflow lime matplotlib scikit-image



In [9]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from lime import lime_image
from skimage.segmentation import mark_boundaries, quickshift

# 1. Load MNIST
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

X_train = X_train.reshape(-1, 28, 28, 1)
X_test = X_test.reshape(-1, 28, 28, 1)

# 2. Train CNN
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(10, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=5, validation_split=0.1)

test_loss, test_acc = model.evaluate(X_test, y_test)
print("Test accuracy:", test_acc)

# 3. Apply LIME Image
explainer = lime_image.LimeImageExplainer()

idx = 15  # Index of the test image
image = X_test[idx]

# Reshape the image to (28, 28) for the quickshift segmentation function
image_for_segmentation = image.reshape(28, 28)

# Define a segmentation function that does not convert to Lab space
def grayscale_segmentation(image):
    # Reshape the image to (height, width, 1) for quickshift if needed
    if image.ndim == 2:
        image = image.reshape(image.shape[0], image.shape[1], 1)
    # Check if the image is grayscale (last dimension is 1)
    if image.shape[-1] == 1:
        # Convert to a 3-channel image by repeating the single channel
        image = np.repeat(image, 3, axis=-1)
    return quickshift(image, kernel_size=4, max_dist=200, ratio=0.2, convert2lab=False)


explanation = explainer.explain_instance(image=image.reshape(28, 28, 1), # Keep image as (28, 28, 1) for the model
                                         classifier_fn=model.predict,
                                         top_labels=1,
                                         hide_color=0,
                                         num_samples=1000,
                                         segmentation_fn=grayscale_segmentation) # Use the custom segmentation function


# 4. Visualize Regions
temp, mask = explanation.get_image_and_mask(label=explanation.top_labels[0],
                                            positive_only=True,
                                            num_features=10,
                                            hide_rest=False)

# Ensure temp is in the correct shape for mark_boundaries (height, width, channels)
if temp.ndim == 2:
    temp = temp.reshape(temp.shape[0], temp.shape[1], 1)
    temp = np.repeat(temp, 3, axis=-1) # Convert to 3 channels for mark_boundaries


plt.imshow(mark_boundaries(temp, mask))
plt.title(f"LIME Explanation for Digit {y_test[idx]}")
plt.axis('off')
plt.show()

Epoch 1/5


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 15ms/step - accuracy: 0.9044 - loss: 0.3298 - val_accuracy: 0.9817 - val_loss: 0.0681
Epoch 2/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 14ms/step - accuracy: 0.9814 - loss: 0.0626 - val_accuracy: 0.9862 - val_loss: 0.0551
Epoch 3/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 14ms/step - accuracy: 0.9894 - loss: 0.0355 - val_accuracy: 0.9887 - val_loss: 0.0483
Epoch 4/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 14ms/step - accuracy: 0.9938 - loss: 0.0208 - val_accuracy: 0.9835 - val_loss: 0.0652
Epoch 5/5
[1m1688/1688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 14ms/step - accuracy: 0.9964 - loss: 0.0122 - val_accuracy: 0.9867 - val_loss: 0.0587
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.9814 - loss: 0.0616
Test accuracy: 0.9846000075340271


  0%|          | 0/1000 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27

ValueError: shape mismatch: value array of shape (3,) could not be broadcast to indexing result of shape (0,1)