In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder

# Load the Ionosphere dataset
data = pd.read_csv("ionosphere.data.csv")

# Separate features and target
X = data.drop('Class', axis="columns")
y = data['Class']

# Encode the target variable (Class)
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train a Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Step 4: Define the Integrated Gradients function
def integrated_gradients(model, input_data, baseline=None, steps=50):
    print("input data")
    print(input_data)
    
    # Use a zero baseline if none is provided
    if baseline is None:
        baseline = np.zeros_like(input_data)

    print("baseline")
    print(baseline)
    
    # Initialize an empty list to hold the interpolated inputs
    interpolated_inputs = []

    # Loop through each step from 0 to the number of steps (inclusive)
    for i in range(steps + 1):
        # Calculate the interpolation factor (i / steps)
        factor = i / steps
        
        # Calculate the interpolated point
        interpolated_point = baseline + factor * (input_data - baseline)
        
       
        
        # Append the interpolated point to the list
        interpolated_inputs.append(interpolated_point)

    # Convert the list of interpolated inputs to a NumPy array for easier manipulation
    interpolated_inputs = np.array(interpolated_inputs)

    
    # Predict model outputs for each interpolated input
    predictions = np.array([model.predict_proba([input])[0] for input in interpolated_inputs])
    
    print("predictions")
    print(predictions)
    
    # Get the predicted class for the original input
    target_class = np.argmax(model.predict_proba([input_data])[0])
    
    print("target_class")
    print(target_class)
    
    # Calculate gradients by measuring change in prediction
    gradients = predictions[:, target_class] - predictions[0, target_class]
    
    print("gradients")
    print(gradients)
    
    # Average gradients across all steps and multiply by (input - baseline)
    avg_gradients = np.mean(gradients)
    
    print("avg_gradients")
    print(avg_gradients)
    
    integrated_grads = (input_data - baseline) * avg_gradients
    
    return integrated_grads

# Step 5: Choose an instance to explain (first test instance)
instance_to_explain = X_test.iloc[0].values  # Use .iloc to get the instance as an array

# Step 6: Compute Integrated Gradients for this instance
integrated_grads = integrated_gradients(model, instance_to_explain)

print("integrated_grads")
print(integrated_grads)

# Step 7: Display the feature importance based on Integrated Gradients
print("\nFeature importance (Integrated Gradients):")
for i, feature_name in enumerate(X.columns):
    print(f"{feature_name}: {integrated_grads[i]:.4f}")


input data
[ 1.       0.       0.4709   0.22751  0.42328  0.33598  0.25661  0.47619
  0.01852  0.49471 -0.02116  0.53968 -0.34127  0.31217 -0.4127   0.3254
 -0.51587  0.06878 -0.5     -0.1164  -0.14815 -0.1455  -0.14815 -0.38095
 -0.2328   0.00265  0.03574 -0.31739  0.15873 -0.21693  0.24868 -0.24339
  0.2672   0.04233]
baseline
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]




predictions
[[0.99 0.01]
 [0.97 0.03]
 [0.97 0.03]
 [0.95 0.05]
 [0.93 0.07]
 [0.92 0.08]
 [0.87 0.13]
 [0.87 0.13]
 [0.87 0.13]
 [0.87 0.13]
 [0.87 0.13]
 [0.86 0.14]
 [0.84 0.16]
 [0.83 0.17]
 [0.83 0.17]
 [0.78 0.22]
 [0.74 0.26]
 [0.73 0.27]
 [0.69 0.31]
 [0.68 0.32]
 [0.67 0.33]
 [0.6  0.4 ]
 [0.55 0.45]
 [0.54 0.46]
 [0.52 0.48]
 [0.51 0.49]
 [0.35 0.65]
 [0.34 0.66]
 [0.24 0.76]
 [0.24 0.76]
 [0.23 0.77]
 [0.23 0.77]
 [0.22 0.78]
 [0.22 0.78]
 [0.22 0.78]
 [0.21 0.79]
 [0.21 0.79]
 [0.22 0.78]
 [0.23 0.77]
 [0.23 0.77]
 [0.24 0.76]
 [0.24 0.76]
 [0.24 0.76]
 [0.23 0.77]
 [0.18 0.82]
 [0.19 0.81]
 [0.17 0.83]
 [0.15 0.85]
 [0.13 0.87]
 [0.13 0.87]
 [0.13 0.87]]
target_class
1
gradients
[0.   0.02 0.02 0.04 0.06 0.07 0.12 0.12 0.12 0.12 0.12 0.13 0.15 0.16
 0.16 0.21 0.25 0.26 0.3  0.31 0.32 0.39 0.44 0.45 0.47 0.48 0.64 0.65
 0.75 0.75 0.76 0.76 0.77 0.77 0.77 0.78 0.78 0.77 0.76 0.76 0.75 0.75
 0.75 0.76 0.81 0.8  0.82 0.84 0.86 0.86 0.86]
avg_gradients
0.4827450980392156
integr