In [1]:
import numpy as np
import math

In [2]:
# problem parameters
probs1 = np.asarray([0.2, 0.4, 0.4])
probs2 = np.asarray([0.1, 0.9, 0])
probs3 = np.asarray([0.6, 0.1, 0.3])
arm_dists = np.asarray([probs1, probs2, probs3])

b_vals = np.asarray([0.1, 10.])
u = np.asarray([10, 0, 4])
umax = max(u)

In [3]:
# define
def select_arm(num_arms):
    return np.random.randint(num_arms)

def select_teacher(num_teachers):
    return np.random.randint(num_teachers)

def select_query(num_items):
    i0, i1 = np.random.choice(num_items, size=2, replace=False)
    if i0 < i1:
        return i0, i1   
    return i1, i0

def try_arm():
    return bool(np.random.choice([False, True]))

In [4]:
# dynamics of problem
def pull_arm(c, arm_dists):
    return np.random.choice(3, size=1, p=arm_dists[c])
    
def query_teacher(b, q, b_vals, u):
    p = math.exp(b_vals[b]*u[q[1]])/(math.exp(b_vals[b]*u[q[1]])+math.exp(b_vals[b]*u[q[0]]))
    return np.random.binomial(1, p)        

In [22]:
# calculations
def calc_deltas(P_hat, b_vals, num_items, teacher):
    t = teacher
    deltas = np.zeros((num_items, num_items))
    for i in range(num_items):
        for j in range(i+1, num_items):
            deltas[j][i] = calc_delta(P_hat[t][i][j], b_vals[t])
            deltas[i][j] = -deltas[j][i]

    return deltas

def calc_delta(p, b):
    return (-1/b)*math.log((1/p)-1)

def calc_scaled_vals1(deltas, rmax): 
    num_items = np.shape(deltas)[0]
    rnge = np.amax(deltas)
    result = np.where(np.isclose(deltas,rnge))
    min_i = result[0][0]
    max_i = result[1][0]

    true_vals = np.zeros(num_items)
    for i in range(num_items):
        val = deltas[max_i][i]
        true_vals[i] = -val*(rmax/rnge)
    return true_vals

def calc_scaled_vals2(deltas, rmax, rmin): 
    num_items = np.shape(deltas)[0]
    rnge = np.amax(deltas)
    result = np.where(np.isclose(deltas,rnge))
    min_i = result[0][0]
    max_i = result[1][0]

    true_vals = np.zeros(num_items)
    for i in range(num_items):
        val = deltas[max_i][i]
        print(val/np.amin(deltas))
        true_vals[i] = rmin+(val/np.amin(deltas))*(rmax-rmin)
    return true_vals

In [36]:
# given
num_arms = 3
num_teachers = 2
num_items = 3

# init
count_arm_pulls = np.zeros((num_arms))
count_arm_items = np.zeros((num_arms, num_items))
count_teach_query_pulls = np.zeros((num_teachers, num_items, num_items))
teach_query_prefs = np.zeros((num_teachers, num_items, num_items))

# infer
for _ in range(1000): #->infty
    if try_arm():
        c = select_arm(num_arms)
        i = pull_arm(c, arm_dists)
        count_arm_pulls[c] += 1
        count_arm_items[c][i] += 1
    else:
        b = select_teacher(num_teachers)
        q = select_query(num_items)
        p = query_teacher(b, q, b_vals, u)
        count_teach_query_pulls[b][q[0]][q[1]] += 1
        teach_query_prefs[b][q[0]][q[1]] += p
        
# calculate
D_hat = [[count_arm_items[c][i]/count_arm_pulls[c] for i in range(num_items)] for c in range(num_arms)]
P_hat = teach_query_prefs/count_teach_query_pulls
deltas = calc_deltas(P_hat, b_vals, num_items, 0)
U_hat1 = calc_scaled_vals1(deltas, umax)
U_hat2 = calc_scaled_vals2(deltas, umax-2, -2)

1.0
-0.0
0.7661859021829994


  P_hat = teach_query_prefs/count_teach_query_pulls


In [37]:
# estimated arm distributions 
# (num_arms, num_items)
print("D*:")
print(arm_dists)

print("\nD est:")
print(D_hat)

print("\nU*:")
print(u)

print("\nU est 1:")
print(U_hat1)

print("\nU est 2:")
print(U_hat2)

D*:
[[0.2 0.4 0.4]
 [0.1 0.9 0. ]
 [0.6 0.1 0.3]]

D est:
[[0.23837209302325582, 0.3953488372093023, 0.36627906976744184], [0.10285714285714286, 0.8971428571428571, 0.0], [0.6580645161290323, 0.05161290322580645, 0.2903225806451613]]

U*:
[10  0  4]

U est 1:
[10.         -0.          7.66185902]

U est 2:
[ 8.         -2.          5.66185902]


In [35]:
# intermediate values
t = 0
i = 1
j = 2
P = P_hat[t][i][j]
print("teacher", t, "preferred item", j, "to item", i, round(P,3)*100, "percent of the time")

# deltas[i][j] = U(i)-U(j)
print("\ndeltas:")
print(deltas)

teacher 0 preferred item 2 to item 1 57.49999999999999 percent of the time

deltas:
[[  0.          10.57790294   8.75468737]
 [-10.57790294   0.          -3.01105093]
 [ -8.75468737   3.01105093   0.        ]]


In [9]:
def generate_deltas(r):
    num_items = len(r)
    deltas = [[r[i]-r[j] for j in range(num_items)] for i in range(num_items)]
    return deltas

# [[0, u0-u1, u0-u2], [u1-u0, 0, u1-u2], [u2-u0, u2-u1, 0]]
deltas1 = generate_deltas([10, 0, 4])
deltas2 = generate_deltas([-1, 1, 1, 1, 15])

print(deltas1)
print(calc_scaled_vals(deltas1, umax))

# instransitive
deltas3 = [[0, 5, 6], [-5, 0, -3], [-6, 3, 0]]
print(deltas3)
print(calc_scaled_vals(deltas3, umax))

[[0, 10, 6], [-10, 0, -4], [-6, 4, 0]]
[10.  0.  4.]
[[0, 5, 6], [-5, 0, -3], [-6, 3, 0]]
[10. -5.  0.]
