#### Subsequence Function

In [1]:
def subseq(seq):
    n = len(seq)
    for i in range(1, 2**n):
        b = format(i, f'0{n}b')
        s = []
        for j in range(len(b)):
            if int(b[-j-1]) == 1:
                s.append(seq[j])
        s.sort()
        yield s

### V Function

In [2]:
def v_function(A, C_values):
    """
    Computes worth of one coalition of channels
    :param A: a coalition of channels
    :param C_values: dict with conversion # of each set of channels
    :return: worth of A
    """
    worth_of_A = 0
    for subset in subseq(A.split(',')):
        subset.sort()
        subset = ','.join(subset)
        if subset in C_values.keys():
            worth_of_A += C_values[subset]
    return worth_of_A

### Import dataset

In [1]:
import pandas as pd

df = pd.read_csv('shapley_source.csv')
C_values = df.set_index('copy_list').to_dict()['conversions']
channels = sorted([c for c in C_values.keys() if ',' not in c])
print(sum(C_values.values()))

146552


### Generate V Values

In [2]:
def get_v_values(channels, C_values):
    import os.path
    import pickle
    filename = 'v_values.pickle'
    if os.path.isfile(filename):
        with open(filename, 'rb') as file:
            return pickle.load(file)
    v_values = {}
    count = 0
    for A in subseq(channels):
        count += 1
        A = ','.join(A)
        print(f'{count}: {A}\r', end='')
        v_values[A] = v_function(A, C_values)
    print(f'\n{v_values}')
    with open(filename, 'wb') as file:
        pickle.dump(v_values, file)
    return v_values
    
v_values = get_v_values(channels, C_values)
print(v_values)

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [5]:
print([k for k in v_values.keys() if ',' not in k])
channels = sorted([k for k in v_values.keys() if ',' not in k])

['bpecn', 'btc', 'bterf', 'bvosi', 'cmp', 'dx', 'eca', 'ei', 'exc', 'fn', 'mb', 'mc', 'mr', 'pe', 'ss', 'tc', 'tca', 'te', 'tp', 'vl']


In [6]:
def shapley(channels, v_values):
    from collections import defaultdict
    from math import factorial
    n = len(channels)
    res = defaultdict(float)
    count = 0
    for channel in channels:
        count += 1
        print(f'channel {count} of {n}')
        for A in v_values.keys():
            A_arr = A.split(',')
            if channel not in A_arr:
                cardinal_A = len(A_arr)
                A_with_channel = A_arr
                A_with_channel.append(channel)
                A_with_channel = ','.join(sorted(A_with_channel))
                res[channel] += (v_values[A_with_channel] - v_values[A])*(factorial(cardinal_A)*factorial(n-cardinal_A-1)/factorial(n))
        res[channel] += v_values[channel] / n
    return res

s = shapley(channels, v_values)

channel 1 of 20
channel 2 of 20
channel 3 of 20
channel 4 of 20
channel 5 of 20
channel 6 of 20
channel 7 of 20
channel 8 of 20
channel 9 of 20
channel 10 of 20
channel 11 of 20
channel 12 of 20
channel 13 of 20
channel 14 of 20
channel 15 of 20
channel 16 of 20
channel 17 of 20
channel 18 of 20
channel 19 of 20
channel 20 of 20


In [8]:
#for c in channels:
#    assert(s[c] >= C_values[c])
from pprint import pprint
pprint(s)
print(sum(s.values()))

defaultdict(<class 'float'>,
            {'bpecn': 642.8887560968714,
             'btc': 996.3663170162721,
             'bterf': 836.0386326091536,
             'bvosi': 553.4737266818416,
             'cmp': 12580.822571871666,
             'dx': 1626.7384369715728,
             'eca': 3164.0737155817937,
             'ei': 1662.2450720031416,
             'exc': 1642.614458172583,
             'fn': 2206.0371715702454,
             'mb': 2200.4096712927,
             'mc': 1539.3435568516495,
             'mr': 2047.7674912504988,
             'pe': 1050.4687871768874,
             'ss': 1159.5126405457077,
             'tc': 17676.35932441781,
             'tca': 7830.0099206323885,
             'te': 1302.044322752442,
             'tp': 846.0466509797654,
             'vl': 896.7387755218949})
62459.99999999688


### Plot Result Histogram

In [None]:
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(30, 10))
ax = fig.add_subplot(111)
ax.bar(s.keys(), s.values(), color='r')
plt.title('Conversions by Shapley Value')
plt.xlabel('Copy')
plt.ylabel('Conversions')
plt.savefig('conversions.png', format='png')
plt.show()