In [1]:
# import our framework 'fam' aka 'Feature Attribution with Measures'
import fam

import torch 
from torch import nn

Illustration of global feature attribution (using Riemann sum approximation) in the setting of Corollary C.3 for linear models

In [None]:
# define the model to explain
# you need to put ... to take into account all the possible batch dimensions.
model = lambda x: x[..., 0] + x[..., 1]

# define the input space
input_dim = 2
input_space = [0, 1]

# define the integrands for each feature to explain. This one corresponds to the integrands of Corollary C.3 in the paper.
integrands = [lambda x, y : 2*x[..., 0] * (x[..., 1] > 0).float(), lambda x, y: 2*(x[...,0] > 0).float() * x[..., 1]]

# define the attribution method
attribution_method = fam.FeatureAttribution(model, input_dim, input_space, integrands = integrands)

# define the input to explain, here the take None as we are doing global attribution
explained_point = None

# compute the attribution, choose batch_size as a divisor of N_points^(input_dim)
attributions_tensor = torch.tensor(attribution_method.stieltjes_attribution(explained_point, is_montecarlo = False, N_points = 1000, batch_size=100))

print("Attribution for each feature: ", torch.round(attributions_tensor * 1000) / 1000)
# We are happy with [0.9990, 0.9990] as the model is symmetric in the two features.

100%|██████████| 10000/10000 [00:00<00:00, 11902.58it/s]

Attribution for each feature:  tensor([0.9990, 0.9990])





Illustration of local feature attribution (using Riemann sum approximation) in the setting of Corollary C.4 for linear models

In [None]:
# define the model to explain
# you need to put ... to take into account all the possible batch dimensions.
model = lambda x: 2*x[..., 0] - 1.7*x[..., 1]

# define the input space
input_dim = 2
input_space = [0, 1]

# define the integrands for each feature to explain. This one corresponds to the integrands of Corollary C.3 in the paper.
# y is the input to explain and x is the point we are integrating over.
integrands = [lambda x, y: (x[...,0] >= y[..., 0]).float() * (x[..., 1] > 0).float(), lambda x, y : (x[..., 0] > 0).float() * (x[..., 1] >= y[..., 1]).float()]

# define the attribution method
attribution_method = fam.FeatureAttribution(model, input_dim, input_space, integrands = integrands)

# define the input to explain
explained_point = torch.tensor([0.5, 0.1])

# compute the attribution, choose batch_size as a divisor of N_points^(input_dim)
attributions_tensor = torch.tensor(attribution_method.stieltjes_attribution(explained_point, is_montecarlo = False, N_points = 1_000, batch_size=100))

print("Attribution for each feature: ", torch.round(attributions_tensor * 1000) / 1000)
# We are happy with [0.9980000257492065, -0.16830001771450043] as it is exactly w_j \times x_j as in Corollary C.4, meaning:
# 0.9980 ≈ 2 * 0.5
# -0.1680 ≈ -1.7 * 0.1

100%|██████████| 10000/10000 [00:00<00:00, 11153.81it/s]

Attribution for each feature:  tensor([ 0.9980, -0.1680])





Illustration of global feature attribution (using Riemann sum approximation) in the setting of Corollary C.4 for ReLU network

In [6]:
# Seeding for reproducibility
SEED = 25
torch.manual_seed(SEED)

# define the model
input_dim = 2
model = nn.Sequential(
    nn.Linear(input_dim, 5),
    nn.ReLU(),
    nn.Linear(5, 1)
)

# define the input space
input_space = [0, 1]

# define the integrands for each feature to explain. This one corresponds to the integrands of Corollary C.3 in the paper.
integrands = [lambda x, y : 2*x[..., 0] * (x[..., 1] > 0).float(), lambda x, y: 2*(x[...,0] > 0).float() * x[..., 1]]

# define the attribution method
attribution_method = fam.FeatureAttribution(model, input_dim, input_space, integrands = integrands)

# define the input to explain, here the take None as we are doing global attribution
explained_point = None

# compute the attribution, choose batch_size as a divisor of N_points^(input_dim)
attributions_tensor = torch.tensor(attribution_method.stieltjes_attribution(explained_point, is_montecarlo = False, N_points = 1000, batch_size=100))

print("Attribution for each feature: ", torch.round(attributions_tensor * 1000) / 1000)

100%|██████████| 10000/10000 [00:00<00:00, 10955.83it/s]

Attribution for each feature:  tensor([-0.2870, -0.4900])





Illustration of global feature attribution (using Monte Carlo approximation with uniform density on $[0,1]$ for each feature) for ReLU network

In [8]:
# Seeding for reproducibility
SEED = 25
torch.manual_seed(SEED)

# define the model
input_dim = 2
model = nn.Sequential(
    nn.Linear(input_dim, 5),
    nn.ReLU(),
    nn.Linear(5, 1)
)
# define the input space
input_space = [0, 1]

# integrands set to None as we use a sampler that should be overwritten if you want to use diffrent probability distributions.
integrands = None

# define the attribution method
attribution_method = fam.FeatureAttribution(model, input_dim, input_space, integrands = integrands)

# define the input to explain, here the take None as we are doing global attribution
explained_point = None

# compute the attribution, choose batch_size as a divisor of N_points^(input_dim)
attributions_tensor = attribution_method.stieltjes_attribution(explained_point, is_montecarlo = True, N_points = 1000, batch_size=100)

print("Attribution for each feature: ", torch.round(attributions_tensor * 1000) / 1000)

Attribution for each feature:  tensor([-0.2090, -0.2090])
