In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import json
from agents import Agent
from mouselab import MouselabEnv
from distributions import Normal, Categorical
from model_utils import read_bo_policy
from policies import FixedPlanPolicy, LiederPolicy, RandomTreePolicy
from evaluation import *
from pprint import pprint
import ast

In [2]:
def make_env(depth, reward, cost, ground_truth=False):
    """Returns a MouselabEnv with given depth and cost. 
    If `ground_truth` is True, the reward observed at a given node will be
    constant across runs on this env. This reduces variance of the return."""
    env = MouselabEnv(depth, reward=reward, cost=cost)
    if ground_truth:
        env.ground_truth = np.array([0, *reward.sample(len(env.tree) - 1)])
    return env

def make_envs(depth, reward, cost, n=100, ground_truth=None):
    # Note, ground_truth can be an int in which case it acts as a random seed.
    if ground_truth is not None:
        np.random.seed(ground_truth)
        return [make_env(depth, reward, cost, True) for _ in range(n)]
    else:
        return [make_env(depth, reward, cost, False)] * n

In [3]:
dist = Normal(0,10)
reward = dist.to_discrete(6)
costs = [0.1,1.25,4.00]
stimuli_envs ={str(cost):make_envs([4,1,2], reward, cost, n=16, ground_truth=1) for cost in costs}
stimuli_envs

{'0.1': [<mouselab.MouselabEnv at 0x2b3f31587160>,
  <mouselab.MouselabEnv at 0x2b3f315877b8>,
  <mouselab.MouselabEnv at 0x2b3f31587b00>,
  <mouselab.MouselabEnv at 0x2b3f31587e48>,
  <mouselab.MouselabEnv at 0x2b3f3158a048>,
  <mouselab.MouselabEnv at 0x2b3f3158a2b0>,
  <mouselab.MouselabEnv at 0x2b3f3158a5f8>,
  <mouselab.MouselabEnv at 0x2b3f3158a940>,
  <mouselab.MouselabEnv at 0x2b3f3158ac88>,
  <mouselab.MouselabEnv at 0x2b3f3158afd0>,
  <mouselab.MouselabEnv at 0x2b3f31591358>,
  <mouselab.MouselabEnv at 0x2b3f315916a0>,
  <mouselab.MouselabEnv at 0x2b3f315919e8>,
  <mouselab.MouselabEnv at 0x2b3f31591d30>,
  <mouselab.MouselabEnv at 0x2b3f315960b8>,
  <mouselab.MouselabEnv at 0x2b3f31596400>],
 '1.25': [<mouselab.MouselabEnv at 0x2b3f31596780>,
  <mouselab.MouselabEnv at 0x2b3f31596ac8>,
  <mouselab.MouselabEnv at 0x2b3f31596e10>,
  <mouselab.MouselabEnv at 0x2b3f31599198>,
  <mouselab.MouselabEnv at 0x2b3f315994e0>,
  <mouselab.MouselabEnv at 0x2b3f31599828>,
  <mouselab.Mous

In [4]:
optimal_reward = np.zeros((16,3))
worst_reward = np.zeros((16,3))
mean_reward = np.zeros((16,3))

lc_pol_reward = np.zeros((16,3))
clicks = np.zeros((16,3))

# samples2 = np.zeros((5000,16,3))

for cost_id in range(3):
    
    policy = read_bo_policy(costs[cost_id])
    envs =  stimuli_envs[str(costs[cost_id])]
    for env in envs:
        env.sample_term_reward = True
    ev = evaluate(policy, envs)
    
    lc_pol_reward[:,cost_id] = ev['util']
    clicks[:,cost_id] = ev['observations']
    
#     for i in range(5000):
#         random_policy = RandomTreePolicy()
#         samples2[i,:,cost_id] = evaluate(random_policy, envs)['util']
        
    for env_id in range(16):
        env = envs[env_id]
        optimal_reward[env_id,cost_id] = env.true_Q(0)
        worst_reward[env_id,cost_id] = env.worst_Q(0)
        mean_reward[env_id,cost_id] = env.mean_Q(0)

In [71]:
# rand_reward_samples2 = np.zeros((10000,16,3))
# for sample in range(10000):
#     for cost_id in range(3):
#         envs =  stimuli_envs[str(costs[cost_id])]
#         for env_id in range(16):
#             env = envs[env_id]
#             rand_reward_samples2[sample,env_id,cost_id] = env.rand_Q(0)

In [5]:
optimal_reward

array([[  4.,   4.,   4.],
       [ 36.,  36.,  36.],
       [ 36.,  36.,  36.],
       [ -4.,  -4.,  -4.],
       [ 28.,  28.,  28.],
       [ 28.,  28.,  28.],
       [ 28.,  28.,  28.],
       [ 28.,  28.,  28.],
       [ 20.,  20.,  20.],
       [ 12.,  12.,  12.],
       [ 28.,  28.,  28.],
       [ 12.,  12.,  12.],
       [ 36.,  36.,  36.],
       [ 44.,  44.,  44.],
       [ 28.,  28.,  28.],
       [ 36.,  36.,  36.]])

In [6]:
worst_reward

array([[-36., -36., -36.],
       [-12., -12., -12.],
       [-20., -20., -20.],
       [-36., -36., -36.],
       [-12., -12., -12.],
       [ -4.,  -4.,  -4.],
       [ -4.,  -4.,  -4.],
       [-60., -60., -60.],
       [-20., -20., -20.],
       [-36., -36., -36.],
       [-44., -44., -44.],
       [-20., -20., -20.],
       [-20., -20., -20.],
       [-36., -36., -36.],
       [-20., -20., -20.],
       [-12., -12., -12.]])

In [7]:
mean_reward

array([[-13., -13., -13.],
       [  5.,   5.,   5.],
       [ 12.,  12.,  12.],
       [-18., -18., -18.],
       [  2.,   2.,   2.],
       [ 10.,  10.,  10.],
       [  9.,   9.,   9.],
       [ -1.,  -1.,  -1.],
       [ -5.,  -5.,  -5.],
       [-11., -11., -11.],
       [ -5.,  -5.,  -5.],
       [ -2.,  -2.,  -2.],
       [  9.,   9.,   9.],
       [ -4.,  -4.,  -4.],
       [  9.,   9.,   9.],
       [ 10.,  10.,  10.]])

In [8]:
lc_pol_reward

array([[  2.6 ,  -9.75, -12.  ],
       [ 35.5 ,  26.  ,  20.  ],
       [ 35.1 ,  24.25,  24.  ],
       [ -5.5 , -19.  , -12.  ],
       [ 27.3 ,  24.25,  20.  ],
       [ 27.  ,  10.  ,  16.  ],
       [ 27.  ,  16.75,  -8.  ],
       [ 26.7 ,  10.  ,   0.  ],
       [ 19.2 ,  13.75, -12.  ],
       [ 11.  ,   4.5 , -36.  ],
       [ 27.1 ,  18.  ,  -4.  ],
       [ 10.7 ,   0.75, -16.  ],
       [ 35.4 ,  16.25,  16.  ],
       [ 43.6 ,  40.25,  16.  ],
       [ 27.2 ,  15.5 ,  12.  ],
       [ 34.8 ,  24.75, -20.  ]])

In [9]:
lc_pol_reward - mean_reward

array([[ 15.6 ,   3.25,   1.  ],
       [ 30.5 ,  21.  ,  15.  ],
       [ 23.1 ,  12.25,  12.  ],
       [ 12.5 ,  -1.  ,   6.  ],
       [ 25.3 ,  22.25,  18.  ],
       [ 17.  ,   0.  ,   6.  ],
       [ 18.  ,   7.75, -17.  ],
       [ 27.7 ,  11.  ,   1.  ],
       [ 24.2 ,  18.75,  -7.  ],
       [ 22.  ,  15.5 , -25.  ],
       [ 32.1 ,  23.  ,   1.  ],
       [ 12.7 ,   2.75, -14.  ],
       [ 26.4 ,   7.25,   7.  ],
       [ 47.6 ,  44.25,  20.  ],
       [ 18.2 ,   6.5 ,   3.  ],
       [ 24.8 ,  14.75, -30.  ]])

In [10]:
rel = (lc_pol_reward-mean_reward)/(lc_pol_reward-mean_reward)
rel

  if __name__ == '__main__':


array([[  1.,   1.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.],
       [  1.,  nan,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.]])

In [11]:
clicks

array([[ 14.,  11.,   2.],
       [  5.,   8.,   2.],
       [  9.,   3.,   1.],
       [ 15.,  12.,   2.],
       [  7.,   3.,   2.],
       [ 10.,   8.,   1.],
       [ 10.,   9.,   1.],
       [ 13.,   8.,   1.],
       [  8.,   5.,   2.],
       [ 10.,   6.,   2.],
       [  9.,   8.,   2.],
       [ 13.,   9.,   1.],
       [  6.,   3.,   1.],
       [  4.,   3.,   1.],
       [  8.,  10.,   2.],
       [ 12.,   9.,   2.]])

In [12]:
np.mean(clicks[:,2])

1.5625

In [13]:
np.savetxt('../experiments/data/stimuli/exp1/optimal1A.3.csv', lc_pol_reward, delimiter=",")
np.savetxt('../experiments/data/stimuli/exp1/worst1A.3.csv', mean_reward, delimiter=",")
np.savetxt('../experiments/data/stimuli/exp1/rel_score_pi_star1A.3.csv', rel, delimiter=",")
np.savetxt('../experiments/data/stimuli/exp1/nr_observations_pi_star1A.3.csv', clicks, delimiter=",")

# Pilot PRs

In [46]:
costs = [0.10,1.25,4.00] 
stimuli_envs ={str(cost):make_envs([4,1,2], reward, cost, n=16, ground_truth=1) for cost in costs}
stimuli_envs

{'0.1': [<mouselab.MouselabEnv at 0x2b0b0718c4a8>,
  <mouselab.MouselabEnv at 0x2b0b0718ceb8>,
  <mouselab.MouselabEnv at 0x2b0b0718c9e8>,
  <mouselab.MouselabEnv at 0x2b0b07119f98>,
  <mouselab.MouselabEnv at 0x2b0b071194a8>,
  <mouselab.MouselabEnv at 0x2b0b07119198>,
  <mouselab.MouselabEnv at 0x2b0b07119278>,
  <mouselab.MouselabEnv at 0x2b0b0713e9e8>,
  <mouselab.MouselabEnv at 0x2b0b0713e4a8>,
  <mouselab.MouselabEnv at 0x2b0b07150198>,
  <mouselab.MouselabEnv at 0x2b0b07150dd8>,
  <mouselab.MouselabEnv at 0x2b0b07160828>,
  <mouselab.MouselabEnv at 0x2b0b07150d68>,
  <mouselab.MouselabEnv at 0x2b0b07160f28>,
  <mouselab.MouselabEnv at 0x2b0b071b8828>,
  <mouselab.MouselabEnv at 0x2b0b071d0e48>],
 '1.25': [<mouselab.MouselabEnv at 0x2b0b071da828>,
  <mouselab.MouselabEnv at 0x2b0b071da2e8>,
  <mouselab.MouselabEnv at 0x2b0b071ec2e8>,
  <mouselab.MouselabEnv at 0x2b0b071ec908>,
  <mouselab.MouselabEnv at 0x2b0b071f4588>,
  <mouselab.MouselabEnv at 0x2b0b0065a0f0>,
  <mouselab.Mous

In [54]:
trials = pd.read_csv('../experiments/data/human/0.995/trials.csv')
trials = trials.loc[trials['info_cost'].isin(costs)]
trials

Unnamed: 0.1,Unnamed: 0,pid,info_cost,PR_type,message,trial_index,trial_id,delays,score,n_click,clicks,click_times,path,action_times
0,0,0,0.10,none,none,1,9.0,[None],-4.20,2,"[9, 12]","[5197, 9537]","[1, 10, 12]","[10761, 12209, 14729]"
1,1,0,0.10,none,none,2,13.0,[None],43.40,6,"[9, 5, 13, 1, 4, 2]","[5147, 7471, 9090, 9913, 11653, 13063]","[1, 2, 4]","[14832, 15872, 16935]"
2,2,0,0.10,none,none,3,6.0,[None],19.60,4,"[1, 2, 3, 4]","[2932, 4680, 6373, 7436]","[1, 2, 4]","[9988, 10692, 11444]"
3,3,0,0.10,none,none,4,5.0,[None],19.30,7,"[9, 1, 2, 3, 13, 14, 16]","[1693, 4647, 6062, 6640, 8281, 11421, 12077]","[1, 2, 3]","[13335, 14046, 14911]"
4,4,0,0.10,none,none,5,15.0,[None],35.40,6,"[1, 9, 5, 6, 10, 12]","[2420, 3950, 6460, 7726, 8576, 9467]","[1, 10, 12]","[10837, 11700, 12412]"
5,5,0,0.10,none,none,6,12.0,[None],35.70,3,"[9, 11, 10]","[3482, 5404, 6060]","[1, 10, 11]","[7941, 8708, 9420]"
6,6,0,0.10,none,none,7,4.0,[None],27.00,10,"[13, 14, 1, 9, 5, 6, 8, 7, 16, 15]","[1953, 3427, 5559, 6681, 7308, 8269, 9423, 101...","[1, 6, 7]","[17443, 18138, 18914]"
7,7,0,0.10,none,none,8,0.0,[None],3.10,9,"[9, 1, 5, 13, 6, 10, 14, 15, 16]","[3680, 5083, 5706, 6382, 8544, 9794, 10634, 12...","[1, 14, 16]","[18118, 18989, 19862]"
8,8,0,0.10,none,none,9,3.0,[None],-5.10,11,"[9, 1, 5, 13, 11, 12, 10, 2, 6, 7, 14]","[1439, 2809, 4147, 5095, 6261, 7195, 9087, 990...","[1, 10, 12]","[21205, 22061, 22701]"
9,9,0,0.10,none,none,10,2.0,[None],27.50,5,"[13, 1, 2, 3, 4]","[3669, 5244, 6365, 6959, 7945]","[1, 2, 3]","[9290, 10041, 10673]"


In [48]:
trials['pr_sum'] = np.zeros(trials.shape[0])
trials['click_prs'] = [[]]*trials.shape[0]

In [51]:
with open('data/q_weights.json') as data_file:    
    q_weights = json.load(data_file)
pprint(q_weights)

{'0.10': [4.645176951827218,
          0.04783056305614096,
          0.044789706164195975,
          0.9338160228125272,
          0.9994239339903965],
 '1.25': [1.7807939607281613,
          0.45246829707936637,
          0.27572911981296844,
          0.39014475114599445,
          0.9905875327727384],
 '4.00': [1.0066519566667151,
          0.9887916151065947,
          -0.00820497960640427,
          0.006948313322695628,
          1.0003951332873617]}


In [52]:
for index, row in trials.iterrows():
    if index % 20 == 0: print(index)
    cost = row['info_cost']
    s_cost = str(cost)
    trial_id = row['trial_index']
    
#     stimuli_envs = make_envs([4,1,2], reward, cost, n=16, ground_truth=1)
#     menv = stimuli_envs[trial_id-1]
    menv = stimuli_envs[s_cost][trial_id-1]
       
    if s_cost == '0.1': s_cost = '0.10'
    elif s_cost == '4.0': s_cost = '4.00'
    q_weight = q_weights[s_cost]
    
    clicks = ast.literal_eval(row['clicks'])
    clicks.append(17)
    pr_sum = 0
    click_prs = []
    for a in clicks:
        f = menv.action_features(a)
        pr = np.inner(q_weight,f) - np.max([np.inner(q_weight,menv.action_features(a2)) for a2 in menv.actions(menv._state)])
        menv.step(a)
        pr_sum += pr
        click_prs.append(pr)
    trials.set_value(index,'pr_sum',pr_sum)
    trials.set_value(index,'click_prs',click_prs)
    menv.reset()

0
40
60
120
280
360
380
400
460
480
580
600
620
640
660
680
740
760
780
800
820
880
960
1000
1020
1100
1140
1160
1200
1240
1280
1300
1340
1400
1420
1440
1460
1480
1520
1540
1560
1600
1640
1660
1740
1780
1800
1900
1980


In [41]:
csv_ready = trials[['pid','info_cost','trial_index','pr_sum','click_prs']]

In [42]:
csv_ready

Unnamed: 0,pid,info_cost,trial_index,pr_sum,click_prs
0,0,0.10,1,-19.473024,"[0.0, -0.324304015787, -19.1487199638]"
1,0,0.10,2,-14.298082,"[0.0, -0.210504570876, -0.382266880015, -0.382..."
2,0,0.10,3,-1.158616,"[0.0, -0.131482925701, 0.0, 0.0, -1.02713289086]"
3,0,0.10,4,-12.732936,"[0.0, -0.024981893567, -0.213568828633, -0.448..."
4,0,0.10,5,-7.742030,"[0.0, 0.0, 0.0, -0.103683559651, -0.3740098722..."
5,0,0.10,6,-12.339585,"[0.0, 0.0, -0.142487954368, -12.1970973059]"
6,0,0.10,7,-14.684785,"[0.0, -0.0187194016522, -0.335429268645, -0.33..."
7,0,0.10,8,-2.808495,"[0.0, 0.0, -0.210504570876, -0.232311392331, -..."
8,0,0.10,9,-0.836415,"[0.0, -0.210504570876, -0.210504570876, -0.232..."
9,0,0.10,10,-16.712976,"[0.0, 0.0, -0.593528882527, -0.632584865513, -..."


In [43]:
res = trials.set_index(['pid','info_cost','trial_index'])['click_prs'].apply(pd.Series).stack()
res = res.reset_index()
res.columns = ['pid','info_cost','trial_index','click_num','click_pr']
res

Unnamed: 0,pid,info_cost,trial_index,click_num,click_pr
0,0,0.1,1,0,0.000000
1,0,0.1,1,1,-0.324304
2,0,0.1,1,2,-19.148720
3,0,0.1,2,0,0.000000
4,0,0.1,2,1,-0.210505
5,0,0.1,2,2,-0.382267
6,0,0.1,2,3,-0.382267
7,0,0.1,2,4,-0.516120
8,0,0.1,2,5,-0.521028
9,0,0.1,2,6,-12.285895


In [44]:
csv_ready.to_csv('../experiments/data/human/0.995/pilot_prs.csv')
res.to_csv('../experiments/data/human/0.995/pilot_click_prs.csv')