In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LinearRegression

# Step 1: Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Train a Random Forest model
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Step 4: Choose an instance to explain
instance_to_explain = X_test[0]
print("Instance to explain:", instance_to_explain)

# Step 5: Generate perturbed samples
def generate_perturbations(instance, num_samples=100):
    # Create an empty list to store the perturbations
    perturbations = []
    
    # Loop to generate num_samples perturbations
    for _ in range(num_samples):
        # Generate random noise from a normal distribution with mean 0 and std 0.1
        noise = np.random.normal(0, 0.1, size=instance.shape)
        
        # Add the noise to the instance to create a new perturbed version
        perturbed_instance = instance + noise
        
        # Append the perturbed instance to the list
        perturbations.append(perturbed_instance)
    
    # Convert the list of perturbations to a NumPy array and return it
    return np.array(perturbations)


perturbed_data = generate_perturbations(instance_to_explain)

# Step 6: Get predictions for the perturbed data
perturbed_predictions = rf.predict_proba(perturbed_data)

# Step 7: Calculate weights based on distance to original instance ,Euclidean distance
def calculate_weights(original_instance, perturbed_data):
    distances = np.linalg.norm(perturbed_data - original_instance, axis=1)
    return np.exp(-distances)

weights = calculate_weights(instance_to_explain, perturbed_data)

# Step 8: Fit a simple linear regression model
y_perturbed = perturbed_predictions[:, np.argmax(rf.predict_proba([instance_to_explain])[0])]
lr = LinearRegression()
lr.fit(perturbed_data, y_perturbed, sample_weight=weights)

# Step 9: Print the explanation
print("\nFeature importance (coefficients):")
for i, feature_name in enumerate(iris.feature_names):
    print(f"{feature_name}: {lr.coef_[i]:.4f}")

 

Instance to explain: [6.1 2.8 4.7 1.2]

Feature importance (coefficients):
sepal length (cm): -0.0796
sepal width (cm): 0.0008
petal length (cm): -0.8457
petal width (cm): 0.0282
