In [1]:
import numpy as np
import pandas as pd

%load_ext autoreload
%load_ext line_profiler
%load_ext Cython
%autoreload 2

from shapkit_nbdev.shapley_values import ShapleyValues
from shapkit_nbdev.monte_carlo_shapley import MonteCarloShapley
from shapkit_nbdev.sgd_shapley import SGDshapley

In [2]:
def generate_sample(dim, n_samples, rho=0):
    """
    Generate a dataset of independent Gaussian features
    """
    mu = np.zeros(dim)
    sigma = np.ones((dim, dim)) * rho
    np.fill_diagonal(sigma, [1] * dim)
    # Simulation
    X = np.random.multivariate_normal(mean=mu, cov=sigma, size=n_samples)
    df_X = pd.DataFrame(X, columns=['x'+str(i) for i in range(1, dim+1)])
    return df_X

d, n_samples = 5, 100
X = generate_sample(d, n_samples)
y = np.zeros(len(X))
for i in range(len(X)):
    phi_x = np.sqrt(.5 * np.pi) * np.exp(-0.5 * X.values[i] ** 2)
    y[i] = np.prod(phi_x)
    
n = 2**d - 2
def fc(x):
    phi_x = np.sqrt(.5 * np.pi) * np.exp(-0.5 * x ** 2)
    return np.prod(phi_x)
print("dimension = {0} ; nb of coalitions = {1}".format(str(d), str(n)))

dimension = 5 ; nb of coalitions = 30


In [3]:
idx_r, idx_x = np.random.choice(np.arange(len(X)), size=2, replace=False)
r = X.iloc[idx_r,:]
x = X.iloc[idx_x,:]

#### Shapley Values

In [7]:
%timeit ShapleyValues(x=x, fc=fc, r=r)

100%|██████████| 5/5 [00:00<00:00, 523.57it/s]
100%|██████████| 5/5 [00:00<00:00, 481.61it/s]
100%|██████████| 5/5 [00:00<00:00, 516.18it/s]
100%|██████████| 5/5 [00:00<00:00, 504.33it/s]
100%|██████████| 5/5 [00:00<00:00, 382.96it/s]
100%|██████████| 5/5 [00:00<00:00, 455.34it/s]
100%|██████████| 5/5 [00:00<00:00, 333.12it/s]
100%|██████████| 5/5 [00:00<00:00, 447.95it/s]
100%|██████████| 5/5 [00:00<00:00, 429.27it/s]
100%|██████████| 5/5 [00:00<00:00, 448.63it/s]
100%|██████████| 5/5 [00:00<00:00, 477.55it/s]
100%|██████████| 5/5 [00:00<00:00, 505.79it/s]
100%|██████████| 5/5 [00:00<00:00, 519.59it/s]
100%|██████████| 5/5 [00:00<00:00, 430.32it/s]
100%|██████████| 5/5 [00:00<00:00, 450.45it/s]
100%|██████████| 5/5 [00:00<00:00, 476.30it/s]
100%|██████████| 5/5 [00:00<00:00, 420.66it/s]
100%|██████████| 5/5 [00:00<00:00, 447.34it/s]
100%|██████████| 5/5 [00:00<00:00, 523.35it/s]
100%|██████████| 5/5 [00:00<00:00, 514.10it/s]
100%|██████████| 5/5 [00:00<00:00, 471.87it/s]
100%|████████

13.5 ms ± 1.48 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)





In [8]:
%lprun -f ShapleyValues ShapleyValues(x=x, fc=fc, r=r)

100%|██████████| 5/5 [00:00<00:00, 178.13it/s]


Timer unit: 1e-06 s

Total time: 0.029966 s
File: /home/sgrah/Documents/shapkit_nbdev/profiling/shapkit_nbdev/shapley_values.py
Function: ShapleyValues at line 13

Line #      Hits         Time  Per Hit   % Time  Line Contents
    13                                           def ShapleyValues(x, fc, r):
    14                                               """
    15                                               Calculate the exact Shapley Values for an individual x
    16                                               in a game based on a reference r and the reward function fc.
    17                                               """
    18                                           
    19                                               # Get general information
    20         1         96.0     96.0      0.3      feature_names = list(x.index)
    21         1          2.0      2.0      0.0      d = len(feature_names) # dimension
    22         1          3.0      3.0      0.0      set_fe

#### Monte Carlo

In [9]:
%timeit MonteCarloShapley(x=x, fc=fc, r=r, n_iter=1000)

100%|██████████| 1000/1000 [00:00<00:00, 6429.33it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7748.00it/s]
100%|██████████| 1000/1000 [00:00<00:00, 6768.96it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7067.44it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7783.24it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7349.43it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7368.27it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7681.10it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7327.77it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7656.76it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7357.83it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7148.71it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7864.63it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7931.71it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7577.22it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7838.99it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7461.01it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7391.29it/s]
100%|██████████| 1000/1000 [

141 ms ± 4.44 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)





In [10]:
%lprun -f MonteCarloShapley MonteCarloShapley(x=x, fc=fc, r=r, n_iter=1000)

100%|██████████| 1000/1000 [00:00<00:00, 3161.73it/s]


Timer unit: 1e-06 s

Total time: 0.288432 s
File: /home/sgrah/Documents/shapkit_nbdev/profiling/shapkit_nbdev/monte_carlo_shapley.py
Function: MonteCarloShapley at line 11

Line #      Hits         Time  Per Hit   % Time  Line Contents
    11                                           def MonteCarloShapley(x, fc, r, n_iter, callback=None):
    12                                               """
    13                                               Estimate the Shapley Values using an optimized Monte Carlo version.
    14                                               """
    15                                               # Get general information
    16         1         95.0     95.0      0.0      f_r = fc(r.values)
    17         1        213.0    213.0      0.1      feature_names = list(x.index)
    18         1          3.0      3.0      0.0      d = len(feature_names) # dimension
    19                                           
    20                                              

#### Projected Stochastic Gradient Shapley 

In [11]:
sgd_est = SGDshapley(d, C=y.max())
%timeit sgd_est.sgd(x=x, fc=fc, r=r, n_iter=1000, step=.1, step_type="sqrt")

100%|██████████| 1000/1000 [00:00<00:00, 7476.14it/s]
100%|██████████| 1000/1000 [00:00<00:00, 8252.18it/s]
100%|██████████| 1000/1000 [00:00<00:00, 8603.38it/s]
100%|██████████| 1000/1000 [00:00<00:00, 8788.36it/s]
100%|██████████| 1000/1000 [00:00<00:00, 8955.39it/s]
100%|██████████| 1000/1000 [00:00<00:00, 8960.06it/s]
100%|██████████| 1000/1000 [00:00<00:00, 8037.02it/s]
100%|██████████| 1000/1000 [00:00<00:00, 8466.70it/s]
100%|██████████| 1000/1000 [00:00<00:00, 8495.96it/s]
100%|██████████| 1000/1000 [00:00<00:00, 8222.11it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7867.32it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7552.47it/s]
100%|██████████| 1000/1000 [00:00<00:00, 9033.98it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7858.30it/s]
100%|██████████| 1000/1000 [00:00<00:00, 8849.25it/s]
100%|██████████| 1000/1000 [00:00<00:00, 8322.16it/s]
100%|██████████| 1000/1000 [00:00<00:00, 8449.27it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7802.67it/s]
100%|██████████| 1000/1000 [

122 ms ± 4.15 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)





In [15]:
%lprun -f sgd_est.sgd sgd_est.sgd(x=x, fc=fc, r=r, n_iter=1000, step=0.1, step_type="sqrt")

100%|██████████| 1000/1000 [00:00<00:00, 4538.17it/s]


Timer unit: 1e-06 s

Total time: 0.196311 s
File: /home/sgrah/Documents/shapkit_nbdev/profiling/shapkit_nbdev/sgd_shapley.py
Function: sgd at line 111

Line #      Hits         Time  Per Hit   % Time  Line Contents
   111                                               def sgd(self, x, fc, r, n_iter=100, step=.1, step_type="sqrt",
   112                                                       callback=None, Φ_0=False):
   113                                                   """
   114                                                   Stochastic gradient descent algorithm
   115                                                   The game is defined for an element x, a reference r and function fc
   116                                           
   117                                                   """
   118                                           
   119                                                   # Get general information
   120         1         73.0     73.0      0.0         