In [4]:
import pandas as pd
import lime
import lime.lime_tabular
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier
import numpy as np

# Load dataset
dataset = pd.read_csv('../../Datasets/mems_dataset.csv')
X = dataset[['x', 'y', 'z']]
y = dataset['label']

# Train classifier
your_classifier = RandomForestClassifier()
your_classifier.fit(X, y)

# Create LIME explainer
explainer = lime.lime_tabular.LimeTabularExplainer(
    X.to_numpy(),
    feature_names=list(X.columns.values),
    discretize_continuous=True
)

# Select one sample per label
sample_indices = [np.where(y == label)[0][0] for label in [1, 2, 3] if len(np.where(y == label)[0]) > 0]

# Explain predictions
for sample_index in sample_indices:
    test_instance = X.iloc[sample_index]

    # Generate explanation
    exp = explainer.explain_instance(
        test_instance.to_numpy(),
        your_classifier.predict_proba,
        num_features=3,
        top_labels=3
    )

    # Save explanation as HTML
    html_content = exp.as_html(show_table=True, show_all=True)
    with open(f'lime_explanation_label_{y.iloc[sample_index]}_sample_{sample_index}.html', 'w') as f:
        f.write(html_content)

    # Save explanation as image
    fig = exp.as_pyplot_figure()
    fig.savefig(f'lime_explanation_label_{y.iloc[sample_index]}_sample_{sample_index}.png')
    plt.close(fig)

    # Get and sort LIME feature importance
    lime_list = exp.as_list()
    lime_list.sort()
    print(f"Label {y.iloc[sample_index]} LIME explanation: {lime_list}")



Label 1 LIME explanation: [('x > 10.05', 0.1698724010554644), ('y > 0.10', -0.09667702543252744), ('z <= 0.12', -0.4286265487574078)]
Label 2 LIME explanation: [('0.04 < y <= 0.07', 0.004381387165424319), ('0.24 < z <= 0.38', 0.1103856533200422), ('9.99 < x <= 10.05', 0.07659617404448714)]




Label 3 LIME explanation: [('0.12 < z <= 0.24', -0.34409839441973405), ('x <= 9.92', -0.17632683194841473), ('y > 0.10', -0.09294100658573481)]


In [5]:
# pip install lime