In [1]:
import numpy as np
import pandas as pd

%load_ext autoreload
%load_ext line_profiler
%load_ext Cython
%autoreload 2

from shapkit.shapley_values import ShapleyValues
from shapkit.monte_carlo_shapley import MonteCarloShapley
from shapkit.sgd_shapley import SGDshapley

In [2]:
def generate_sample(dim, n_samples, rho=0):
    """
    Generate a dataset of independent Gaussian features
    """
    mu = np.zeros(dim)
    sigma = np.ones((dim, dim)) * rho
    np.fill_diagonal(sigma, [1] * dim)
    # Simulation
    X = np.random.multivariate_normal(mean=mu, cov=sigma, size=n_samples)
    df_X = pd.DataFrame(X, columns=['x'+str(i) for i in range(1, dim+1)])
    return df_X

d, n_samples = 5, 100
X = generate_sample(d, n_samples)
y = np.zeros(len(X))
for i in range(len(X)):
    phi_x = np.sqrt(.5 * np.pi) * np.exp(-0.5 * X.values[i] ** 2)
    y[i] = np.prod(phi_x)
    
n = 2**d - 2
def fc(x):
    phi_x = np.sqrt(.5 * np.pi) * np.exp(-0.5 * x ** 2)
    return np.prod(phi_x)
print("dimension = {0} ; nb of coalitions = {1}".format(str(d), str(n)))

dimension = 5 ; nb of coalitions = 30


In [3]:
idx_r, idx_x = np.random.choice(np.arange(len(X)), size=2, replace=False)
r = X.iloc[idx_r,:]
x = X.iloc[idx_x,:]

#### Shapley Values

In [5]:
%timeit ShapleyValues(x=x, fc=fc, ref=r)

100%|██████████| 5/5 [00:00<00:00, 243.38it/s]
100%|██████████| 5/5 [00:00<00:00, 245.23it/s]
100%|██████████| 5/5 [00:00<00:00, 225.92it/s]
100%|██████████| 5/5 [00:00<00:00, 245.67it/s]
100%|██████████| 5/5 [00:00<00:00, 287.22it/s]
100%|██████████| 5/5 [00:00<00:00, 278.98it/s]
100%|██████████| 5/5 [00:00<00:00, 270.39it/s]
100%|██████████| 5/5 [00:00<00:00, 260.85it/s]
100%|██████████| 5/5 [00:00<00:00, 204.57it/s]
100%|██████████| 5/5 [00:00<00:00, 192.21it/s]
100%|██████████| 5/5 [00:00<00:00, 175.78it/s]
100%|██████████| 5/5 [00:00<00:00, 161.01it/s]
100%|██████████| 5/5 [00:00<00:00, 261.89it/s]
100%|██████████| 5/5 [00:00<00:00, 304.45it/s]
100%|██████████| 5/5 [00:00<00:00, 215.67it/s]
100%|██████████| 5/5 [00:00<00:00, 283.21it/s]
100%|██████████| 5/5 [00:00<00:00, 327.80it/s]
100%|██████████| 5/5 [00:00<00:00, 275.69it/s]
100%|██████████| 5/5 [00:00<00:00, 315.17it/s]
100%|██████████| 5/5 [00:00<00:00, 312.75it/s]
100%|██████████| 5/5 [00:00<00:00, 284.00it/s]
100%|████████

20.6 ms ± 1.6 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)





In [7]:
%lprun -f ShapleyValues ShapleyValues(x=x, fc=fc, ref=r)

100%|██████████| 5/5 [00:00<00:00, 113.28it/s]


Timer unit: 1e-06 s

Total time: 0.045871 s
File: /home/sgrah/Documents/shapkit_github/profiling/shapkit/shapley_values.py
Function: ShapleyValues at line 39

Line #      Hits         Time  Per Hit   % Time  Line Contents
    39                                           def ShapleyValues(x, fc, ref):
    40                                               """
    41                                               Calculate the exact Shapley Values for an individual x
    42                                               in a game based on a reference r and the reward function fc.
    43                                               """
    44                                           
    45                                               # Get general information
    46         1        101.0    101.0      0.2      feature_names = list(x.index)
    47         1          2.0      2.0      0.0      d = len(feature_names) # dimension
    48         1          4.0      4.0      0.0      set_featu

#### Monte Carlo

In [8]:
%timeit MonteCarloShapley(x=x, fc=fc, ref=r, n_iter=1000)

100%|██████████| 1000/1000 [00:00<00:00, 6290.81it/s]
100%|██████████| 1000/1000 [00:00<00:00, 8014.41it/s]
100%|██████████| 1000/1000 [00:00<00:00, 6999.82it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7966.53it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7133.66it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7559.24it/s]
100%|██████████| 1000/1000 [00:00<00:00, 6412.97it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7446.62it/s]
100%|██████████| 1000/1000 [00:00<00:00, 6959.35it/s]
100%|██████████| 1000/1000 [00:00<00:00, 6996.37it/s]
100%|██████████| 1000/1000 [00:00<00:00, 6827.24it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7236.00it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7648.72it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7597.11it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7081.05it/s]
100%|██████████| 1000/1000 [00:00<00:00, 8204.60it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7789.00it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7845.44it/s]
100%|██████████| 1000/1000 [

133 ms ± 4.14 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)





In [9]:
%lprun -f MonteCarloShapley MonteCarloShapley(x=x, fc=fc, ref=r, n_iter=1000)

100%|██████████| 1000/1000 [00:00<00:00, 3423.17it/s]


Timer unit: 1e-06 s

Total time: 0.237975 s
File: /home/sgrah/Documents/shapkit_github/profiling/shapkit/monte_carlo_shapley.py
Function: MonteCarloShapley at line 37

Line #      Hits         Time  Per Hit   % Time  Line Contents
    37                                           def MonteCarloShapley(x, fc, ref, n_iter, callback=None):
    38                                               """
    39                                               Estimate the Shapley Values using an optimized Monte Carlo version.
    40                                               """
    41                                           
    42                                               # Get general information
    43         1         60.0     60.0      0.0      feature_names = list(x.index)
    44         1          2.0      2.0      0.0      d = len(feature_names) # dimension
    45                                           
    46                                               # Individual reference o

#### Projected Stochastic Gradient Shapley 

In [10]:
sgd_est = SGDshapley(d, C=y.max())
%timeit sgd_est.sgd(x=x, fc=fc, ref=r, n_iter=1000, step=.1, step_type="sqrt")

100%|██████████| 1000/1000 [00:00<00:00, 7991.62it/s]
100%|██████████| 1000/1000 [00:00<00:00, 9579.80it/s]
100%|██████████| 1000/1000 [00:00<00:00, 9587.84it/s]
100%|██████████| 1000/1000 [00:00<00:00, 8958.63it/s]
100%|██████████| 1000/1000 [00:00<00:00, 9310.12it/s]
100%|██████████| 1000/1000 [00:00<00:00, 9202.21it/s]
100%|██████████| 1000/1000 [00:00<00:00, 9421.72it/s]
100%|██████████| 1000/1000 [00:00<00:00, 9185.18it/s]
100%|██████████| 1000/1000 [00:00<00:00, 6419.66it/s]
100%|██████████| 1000/1000 [00:00<00:00, 5942.19it/s]
100%|██████████| 1000/1000 [00:00<00:00, 6009.91it/s]
100%|██████████| 1000/1000 [00:00<00:00, 5946.77it/s]
100%|██████████| 1000/1000 [00:00<00:00, 6438.47it/s]
100%|██████████| 1000/1000 [00:00<00:00, 5765.07it/s]
100%|██████████| 1000/1000 [00:00<00:00, 5877.27it/s]
100%|██████████| 1000/1000 [00:00<00:00, 6256.90it/s]
100%|██████████| 1000/1000 [00:00<00:00, 6115.03it/s]
100%|██████████| 1000/1000 [00:00<00:00, 5291.98it/s]
100%|██████████| 1000/1000 [

114 ms ± 19 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)





In [11]:
%lprun -f sgd_est.sgd sgd_est.sgd(x=x, fc=fc, ref=r, n_iter=1000, step=0.1, step_type="sqrt")

100%|██████████| 1000/1000 [00:00<00:00, 3409.19it/s]


Timer unit: 1e-06 s

Total time: 0.270188 s
File: /home/sgrah/Documents/shapkit_github/profiling/shapkit/sgd_shapley.py
Function: sgd at line 137

Line #      Hits         Time  Per Hit   % Time  Line Contents
   137                                               def sgd(self, x, fc, ref, n_iter=100, step=.1, step_type="sqrt",
   138                                                       callback=None, Φ_0=False):
   139                                                   """
   140                                                   Stochastic gradient descent algorithm
   141                                                   The game is defined for an element x, a reference r and function fc
   142                                           
   143                                                   """
   144                                           
   145                                                   # Get general information
   146         1         74.0     74.0      0.0          fe