In [None]:
# default_exp shapleyvalues

In [None]:
import numpy as np
import pandas as pd
from itertools import combinations
from math import factorial
from tqdm import tqdm

In [None]:
%load_ext autoreload
%autoreload 2

# ShapleyValues

> Calculate the exact Shapley Values for an element x and defined by a game based on reference r and function fc.

In [None]:
#hide
from nbdev.showdoc import *

In [None]:
#export
def ShapleyValues(x, fc, r):
    """
    Calculate the exact Shapley Values for an element x and
    defined by a game based on reference r and function fc

    Parameters
    ----------
    x : numpy array or pandas Series
        The instance for which we want to calculate Shapley value of
        each attribute

    fc : python function
        A function which takes as input an array of len(x) and returns
        a real number

    r : numpy array or pandas Series
        The reference instance. The Shapley values (attribute importance)
        is a contrastive explanation according to this reference

    Returns
    -------
    Φ : pandas Series
        Shapley values of each attribute

    References
    ----------
    A Unified Approach to Interpreting Model Predictions
    Scott M. Lundberg, Su-In Lee, 2017 \n
    NIPS 2017

    The Explanation Game: Explaining Machine Learning Models
    with Cooperative Game Theory
    Luke Merrick, Ankur Taly, 2019
    """

    # Get general information
    feature_names = list(x.index)
    d = len(feature_names) # dimension
    set_features = set(feature_names)

    # Store Shapley Values in a pandas Series
    Φ = pd.Series(np.zeros(d), index=feature_names)

    # Start computation (number of coalitions: 2**d - 1)
    for cardinal_S in tqdm(range(0, d)):
        # weight
        ω = factorial(cardinal_S) * (factorial(d - cardinal_S - 1))
        ω /= factorial(d)
        # iter over all combinations of size cardinal_S
        for S in combinations(feature_names, cardinal_S):
            S = list(S)
            z_S = np.array([x[col] if col in S else r[col] for col in feature_names])
            f_S = fc(z_S)
            # Consider only features outside of S
            features_out_S = set_features - set(S)
            for j in features_out_S:
                S_union_j = S + [j]
                z_S_union_j = np.array([x[col] if col in S_union_j else r[col] for col in feature_names])
                f_S_union_j = fc(z_S_union_j)
                # Update Shapley value of attribute i
                Φ[j] += ω * (f_S_union_j - f_S)

    return Φ

# Examples

### Toy dataset

In [None]:
def generate_sample(dim, n_samples, rho=0):
    """
    Gaussian Features
    ex: dim = 3
    mu=[0, 0, 0]
    sigma = [1 rho rho]
            [rho 1 rho]
            [rho rho 1]
    p(x) ~ N(mu, sigma)
    """
    # Law parameters
    mu = np.zeros(dim)
    sigma = np.ones((dim, dim)) * rho
    np.fill_diagonal(sigma, [1] * dim)
    # Simulation
    X = np.random.multivariate_normal(mean=mu, cov=sigma, size=n_samples)
    df_X = pd.DataFrame(X, columns=['x'+str(i) for i in range(1, dim+1)])
    return df_X

In [None]:
d = 5
n_samples = 100
X = generate_sample(d, n_samples)
y = np.zeros(len(X))
for i in range(len(X)):
    phi_x = np.sqrt(.5 * np.pi) * np.exp(-0.5 * X.values[i] ** 2)
    y[i] = np.prod(phi_x)

In [None]:
n = 2**d - 2
def fc(x):
    phi_x = np.sqrt(.5 * np.pi) * np.exp(-0.5 * x ** 2)
    return np.prod(phi_x)
print("dimension = {0} ; nb of coalitions = {1}".format(str(d), str(n)))

dimension = 5 ; nb of coalitions = 30


### Illustration

In [None]:
# set the game
idx_r, idx_x = np.random.choice(np.arange(len(X)), size=2, replace=False)
r = X.iloc[idx_r,:]
x = X.iloc[idx_x,:]

In [None]:
true_shap = ShapleyValues(x=x, fc=fc, r=r)

100%|██████████| 5/5 [00:00<00:00, 537.91it/s]


In [None]:
true_shap

x1   -0.212646
x2   -0.210187
x3   -0.224681
x4    0.569841
x5   -0.193766
dtype: float64

# Tests

In [None]:
r_pred = fc(r.values)
x_pred = fc(x.values)
v_M = x_pred - r_pred

In [None]:
assert np.abs(true_shap.sum() - v_M) <= 1e-10 

# Export nb to script 

In [None]:
from nbdev.export import notebook2script
notebook2script()

Converted index.ipynb.
Converted shapleyvalues.ipynb.
