In [116]:
import numpy as np
import math
from   more_itertools import powerset 
import pandas as pd
from   ucimlrepo import fetch_ucirepo
from   sklearn.model_selection import train_test_split
from   sklearn.linear_model import LinearRegression

In [45]:
# Load the data
wine_quality = fetch_ucirepo(id=186)
wine_subset  = wine_quality['data']['original'][wine_quality['data']['original']['color'] == 'white']
X = np.array(wine_subset[['fixed_acidity', 'volatile_acidity', 'citric_acid','residual_sugar', 'chlorides', 'free_sulfur_dioxide', 'total_sulfur_dioxide', 'density', 'pH', 'sulphates', 'alcohol']])
y = np.array(wine_subset['quality'])

In [46]:
N = set([i for i in range(X.shape[1])])

In [47]:
# Standardise and split the data
X, y = X - X.mean(axis=0), y - y.mean(axis=0)
X, y = X / X.std(axis=0),  y / y.std(axis=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

$$X^TXw = X^Ty$$ 
$$w=(X^TX)^{-1}X^Ty$$

In [48]:
w = np.matmul(np.matmul(np.linalg.inv(np.matmul(X_train.T, X_train)), X_train.T), y_train)

$$R^2  = \frac{1}{N}w^TX^TXw$$
$$R^2_i = \frac{1}{N}w_i (X^TXw)$$

In [49]:
R2  = np.matmul(w.T,np.matmul(np.matmul(X_train.T, X_train), w))/len(y_train)
R2i = np.matmul(np.diag(w),np.matmul(np.matmul(X_train.T, X_train), w))/len(y_train)

$$\hat{y} = Xw$$
$$MSE = \frac{1}{N}\sum(y-\hat{y})^2$$

In [50]:
yh  = np.matmul(X_test, w)
MSE = np.matmul((y_test - yh).T, (y_test - yh)) / len(y_test)

In [11]:
model = LinearRegression()
model.fit(X_train, y_train)

In [12]:
model.coef_

array([ 0.04374206, -0.21793303, -0.00837693,  0.40798936, -0.00065312,
        0.09831014, -0.01161995, -0.41965475,  0.1024188 ,  0.08364131,
        0.31821423])

In [13]:
w

array([ 0.04369787, -0.21795555, -0.00834029,  0.40798158, -0.00065197,
        0.09832442, -0.01157518, -0.41972001,  0.10238261,  0.08364212,
        0.31819767])

In [123]:
def value(xi, S, sample_size):
    s_idx    = np.random.randint(low=0, high=len(X_train), size=sample_size)
    X_sample = X_train[s_idx]   
    X_sample[:,list(S)] = X_test[xi,list(S)]
    return np.mean(np.matmul(X_sample, w))

def marginal(xi, j, S, sample_size):
    return value(xi, S.union(j), sample_size) - value(xi, S, sample_size)

def gamma(N,S):
    return math.factorial(len(S)) * math.factorial(len(N) - len(S) - 1) / math.factorial(len(N))

def phi(xi, j, N, sample_size):
    players = N - j
    return np.sum([gamma(N, S) * marginal(xi, j, set(S), sample_size) for S in powerset(players)])
        
    
    
   

In [126]:
idx = 3
phi_T = 0
for player in N:
    phi_i = phi(idx,{player}, N, 10000)
    phi_T += phi_i
    print(player, phi_i)
print(phi_T)

0 0.022309639375568825
1 -0.000527676565703333
2 -0.002723196431725903
3 -0.39219502746118107
4 -0.0016273158255013314
5 -0.023887325073251135
6 -0.0034026274832109605
7 0.32290031938227215
8 -0.005887370623122
9 -0.013474040874846452
10 0.04994756903800976
-0.04856705254269149
