In [3]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split



In [4]:
# Step 1: Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

In [5]:
# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Step 3: Train a Random Forest model
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)


In [7]:
import numpy as np

# Step 4: Define the Integrated Gradients function
def integrated_gradients(model, input_data, baseline=None, steps=50):
    print("input data")
    print(input_data)
    
    # Use a zero baseline if none is provided
    if baseline is None:
        baseline = np.zeros_like(input_data)

    print("baseline")
    print(baseline)
    
    # Initialize an empty list to hold the interpolated inputs
    interpolated_inputs = []

    # Loop through each step from 0 to the number of steps (inclusive)
    for i in range(steps + 1):
        # Calculate the interpolation factor (i / steps)
        factor = i / steps
        
        # Calculate the interpolated point
        interpolated_point = baseline + factor * (input_data - baseline)
        
        print("interpolated_point")
        print(interpolated_point)
        
        # Append the interpolated point to the list
        interpolated_inputs.append(interpolated_point)

    # Convert the list of interpolated inputs to a NumPy array for easier manipulation
    interpolated_inputs = np.array(interpolated_inputs)
    
    print("interpolated_inputs")
    print(interpolated_inputs)
    
    # Predict model outputs for each interpolated input
    predictions = np.array([model.predict_proba([input])[0] for input in interpolated_inputs])#[0] remove the outer []
    
    print("predictions")
    print(predictions)
    
    # Get the predicted class for the original input
    target_class = np.argmax(model.predict_proba([input_data])[0])#[0] remove the outer []
    
    print("target_class")
    print(target_class)
    
    # Calculate gradients by measuring change in prediction
    gradients = predictions[:, target_class] - predictions[0, target_class]
    
    print("gradients")
    print(gradients)
    
    # Average gradients across all steps and multiply by (input - baseline)
    avg_gradients = np.mean(gradients)
    
    print("avg_gradients")
    print(avg_gradients)
    
    integrated_grads = (input_data - baseline) * avg_gradients
    
    return integrated_grads



In [8]:

# Step 5: Choose an instance to explain (first test instance)
instance_to_explain = X_test[0]


In [9]:

# Step 6: Compute Integrated Gradients for this instance
integrated_grads = integrated_gradients(rf, instance_to_explain)

print("integrated_grads")
print(integrated_grads)


input data
[6.1 2.8 4.7 1.2]
baseline
[0. 0. 0. 0.]
interpolated_point
[0. 0. 0. 0.]
interpolated_point
[0.122 0.056 0.094 0.024]
interpolated_point
[0.244 0.112 0.188 0.048]
interpolated_point
[0.366 0.168 0.282 0.072]
interpolated_point
[0.488 0.224 0.376 0.096]
interpolated_point
[0.61 0.28 0.47 0.12]
interpolated_point
[0.732 0.336 0.564 0.144]
interpolated_point
[0.854 0.392 0.658 0.168]
interpolated_point
[0.976 0.448 0.752 0.192]
interpolated_point
[1.098 0.504 0.846 0.216]
interpolated_point
[1.22 0.56 0.94 0.24]
interpolated_point
[1.342 0.616 1.034 0.264]
interpolated_point
[1.464 0.672 1.128 0.288]
interpolated_point
[1.586 0.728 1.222 0.312]
interpolated_point
[1.708 0.784 1.316 0.336]
interpolated_point
[1.83 0.84 1.41 0.36]
interpolated_point
[1.952 0.896 1.504 0.384]
interpolated_point
[2.074 0.952 1.598 0.408]
interpolated_point
[2.196 1.008 1.692 0.432]
interpolated_point
[2.318 1.064 1.786 0.456]
interpolated_point
[2.44 1.12 1.88 0.48]
interpolated_point
[2.562 1.176

In [10]:

# Step 7: Display the feature importance based on Integrated Gradients
print("\nFeature importance (Integrated Gradients):")
for i, feature_name in enumerate(iris.feature_names):
    print(f"{feature_name}: {integrated_grads[i]:.4f}")



Feature importance (Integrated Gradients):
sepal length (cm): 2.2618
sepal width (cm): 1.0382
petal length (cm): 1.7427
petal width (cm): 0.4449
