In [None]:
# import pyreadr
from glob import glob
import json
import pandas as pd
from itertools import product
import numpy as np
from scipy.special import ndtri, ndtr
from scipy.optimize import least_squares

In [None]:
with open("cems.json", 'r') as fh:
    data = json.load(fh)

In [None]:
preferences = data['preferences']
df = pd.DataFrame(preferences)
df = df[~(df == 'NA').apply(lambda row: np.any(row), axis=1)]

In [None]:
df.school1.unique()

In [None]:
df.school2.unique()

In [None]:
df["pair"] = df.apply(lambda row: (row['school1'], row['school2']), axis=1)

gb = df.groupby("pair")

tbl1 = gb[['win1', 'tied', 'win2']].sum()
tbl1

In [None]:
data = (tbl1 + tbl1[['tied']].values/2).loc[:, ('win1', 'win2')]
data

In [None]:
# data.apply(lambda x, y: x+y, axis=1)
# data.win1 / (data.win1 + data.win2)
# ['Stockholm', 'Barcelona

p = dict((data.win1 / (data.win1 + data.win2)))
p

In [None]:
def make_countmatrix(data):
    A = np.zeros((len(school2idx), len(school2idx)))
    for s1, s2 in data.index:
        A[school2idx[s1], school2idx[s2]] += 1
    return A

def sort_schools(A, axis=0):
    schools_sorted = [list(school2idx)[idx] for idx in np.argsort(A.sum(axis=axis))]
    # list(reversed(schools_sorted))
    return schools_sorted

In [None]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def logit(y):
    return -np.log((1 - y) / y)

def compute_p(data):
    return (data.win1 + 1) / (data.win1 + data.win2 + 2)

def compute_mu(data, F=ndtri):
    mu = {}
    for (s1, s2), p in compute_p(data).items():
        if s2 == schools_sorted[-1]:
            mu[s1] = F(p)
    return mu

def to_np(d):
    return np.array([d[city] for city in schools_sorted[:-1]])

In [None]:
school2idx = {y: x for x, y in enumerate(set(y for x in data.index for y in x))}

In [None]:
A = make_countmatrix(data)
schools_sorted = sort_schools(A, 0)
rev_schools_sorted = list(reversed(schools_sorted))

In [None]:
schools_sorted

In [None]:
def cost(mu):
    N = len(mu)
    mu_1 = {schools_sorted[i]: mu[i] for i in range(N)}
    mu_1[schools_sorted[-1]] = 0
    return np.array([(v - ndtr(mu_1[s1] - mu_1[s2]))**2 for (s1, s2), v in p.items()])

In [None]:
N = len(schools_sorted)
np.sum(cost(np.zeros(N))), np.sum(cost(np.ones(N)))

In [None]:
compute_mu(data)
# p

In [None]:
np.sum(cost(to_np(compute_mu(data))))

In [None]:
mu = {city: [] for city in school2idx}
mu[schools_sorted[-1]] = [0]

for city in rev_schools_sorted:
    table = p.copy()
    for (s1, s2), value in table.items():
        if s2 == city:
            # p_ij = F(mu_i - mu_j)
            # mu_i = Fi(p_ij) + mu_j
            mu[s1].append(ndtri(value) + mu[city][-1])
#             mu[s1].append(value)

In [None]:
mu

In [None]:
mu_1 = {l: np.mean(m) for l, m in mu.items()}
# mu
# ndtri(0.7511551155)
mu_1

In [None]:
np.sum(cost(to_np(mu_1)))

In [None]:
optimized_mu = least_squares(cost, to_np(mu_1)).x
optimized_mu = least_squares(cost, np.zeros(N)).x
# least_squares(cost, np.zeros(5))
# least_squares(cost, to_np(mu_1))

In [None]:
{city: optimized_mu[i] for i, city in enumerate(schools_sorted[:-1])}

In [None]:
import pandas as pd
20 * 60 / 5 / 3

In [None]:
import matplotlib.pyplot as plt
import json

data_dir = '../data/processed/'
with open(data_dir + 'stimulus_wr.json') as fh:
    stimulus_wr = json.load(fh)

for vp in [0, 1, 2, 4]:
    for g in ['low', 'medium', 'high']:
        name = data_dir + f'exp_processed_vp-{vp}_g-{g}.pkl'
        data = pd.read_pickle(name)

        p = dict((data.win1 / (data.win1 + data.win2)))
        p = dict(compute_p(data))
        p = dict(filter(lambda x: ~np.isnan(x[1]), p.items()))

        school2idx = {y: x for x, y in enumerate(set(y for x in data.index for y in x))}
        A = make_countmatrix(data)
        schools_sorted = sort_schools(A, 0)
        # rev_schools_sorted = list(reversed(schools_sorted))

        # optimized_mu = least_squares(cost, to_np(mu_1)).x
        optimized_mu = least_squares(cost, np.zeros(N)).x
        score = {city: optimized_mu[i] for i, city in enumerate(schools_sorted[:-1])}
        score[schools_sorted[-1]] = 0

        # plt.plot([stimulus_wr[key] for key in score], score.values(), 'x')
        plt.title(name)
        for key, value in score.items():
            plt.plot(stimulus_wr[key], value, 'x', label=key)
        plt.legend()
        plt.show()

In [None]:
import jax
import jax.numpy as np

In [None]:
# least_squares(cost, to_np(mu_1), jac=jax.jacfwd(jax.grad(cost)))

In [None]:
[(v - ndtr(mu_1[s1] - mu_1[s2]))**2 for (s1, s2), v in p.items()]

In [None]:
# cost(np.array(mu_1.values()))
cost(np.array(list(mu_1.values())))
np.array(list(mu_1.values()))

In [None]:
mu_1

In [None]:
school2idx

In [None]:
mu = compute_mu(data, ndtri)
mu

In [None]:
logit(0.635314)
0.33 / 0.34
0.99 / 1.064

In [None]:
mu = pd.Series(mu)

In [None]:
mu / sum(mu)

In [None]:
ndtr(0.982 - 0.561)
ndtr(mu['London'] - mu['Paris'])