In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import json
from agents import Agent
from mouselab import MouselabEnv
from distributions import Normal, Categorical
from model_utils import read_bo_policy
from policies import FixedPlanPolicy, LiederPolicy, RandomTreePolicy
from evaluation import *
from pprint import pprint
import ast

In [7]:
def make_env(depth, reward, cost, ground_truth=False):
    """Returns a MouselabEnv with given depth and cost. 
    If `ground_truth` is True, the reward observed at a given node will be
    constant across runs on this env. This reduces variance of the return."""
    env = MouselabEnv(depth, reward=reward, cost=cost)
    if ground_truth:
        env.ground_truth = np.array([0, *reward.sample(len(env.tree) - 1)])
    return env

def make_envs(depth, reward, cost, n=100, ground_truth=None):
    # Note, ground_truth can be an int in which case it acts as a random seed.
    if ground_truth is not None:
        np.random.seed(ground_truth)
        return [make_env(depth, reward, cost, True) for _ in range(n)]
    else:
        return [make_env(depth, reward, cost, False)] * n

In [8]:
dist = Normal(0,10)
reward = dist.to_discrete(6)
costs = [0.1,1.25,4.00]
stimuli_envs ={str(cost):make_envs([4,1,2], reward, cost, n=16, ground_truth=1) for cost in costs}
stimuli_envs

{'0.1': [<mouselab.MouselabEnv at 0x7fe5e2c579b0>,
  <mouselab.MouselabEnv at 0x7fe5e2c57b70>,
  <mouselab.MouselabEnv at 0x7fe5df782cc0>,
  <mouselab.MouselabEnv at 0x7fe5df782390>,
  <mouselab.MouselabEnv at 0x7fe5df782e10>,
  <mouselab.MouselabEnv at 0x7fe5df78c198>,
  <mouselab.MouselabEnv at 0x7fe5df78c550>,
  <mouselab.MouselabEnv at 0x7fe5df78c8d0>,
  <mouselab.MouselabEnv at 0x7fe5df78cc18>,
  <mouselab.MouselabEnv at 0x7fe5df78cf60>,
  <mouselab.MouselabEnv at 0x7fe5df78a0b8>,
  <mouselab.MouselabEnv at 0x7fe5df78a400>,
  <mouselab.MouselabEnv at 0x7fe5df78a748>,
  <mouselab.MouselabEnv at 0x7fe5df78aa90>,
  <mouselab.MouselabEnv at 0x7fe5df78add8>,
  <mouselab.MouselabEnv at 0x7fe5df795160>],
 '1.25': [<mouselab.MouselabEnv at 0x7fe5df7954e0>,
  <mouselab.MouselabEnv at 0x7fe5df795828>,
  <mouselab.MouselabEnv at 0x7fe5df795b70>,
  <mouselab.MouselabEnv at 0x7fe5df795eb8>,
  <mouselab.MouselabEnv at 0x7fe5df71e240>,
  <mouselab.MouselabEnv at 0x7fe5df71e588>,
  <mouselab.Mous

In [9]:
optimal_reward = np.zeros((16,3))
worst_reward = np.zeros((16,3))
mean_reward = np.zeros((16,3))

lc_pol_reward = np.zeros((16,3))
clicks = np.zeros((16,3))

# samples2 = np.zeros((5000,16,3))

for cost_id in range(3):
    
    policy = read_bo_policy(costs[cost_id])
    envs =  stimuli_envs[str(costs[cost_id])]
    for env in envs:
        env.sample_term_reward = True
    ev = evaluate(policy, envs)
    
    lc_pol_reward[:,cost_id] = ev['util']
    clicks[:,cost_id] = ev['observations']
    
#     for i in range(5000):
#         random_policy = RandomTreePolicy()
#         samples2[i,:,cost_id] = evaluate(random_policy, envs)['util']
        
    for env_id in range(16):
        env = envs[env_id]
        optimal_reward[env_id,cost_id] = env.true_Q(0)
        worst_reward[env_id,cost_id] = env.worst_Q(0)
        mean_reward[env_id,cost_id] = env.mean_Q(0)

In [10]:
# rand_reward_samples2 = np.zeros((10000,16,3))
# for sample in range(10000):
#     for cost_id in range(3):
#         envs =  stimuli_envs[str(costs[cost_id])]
#         for env_id in range(16):
#             env = envs[env_id]
#             rand_reward_samples2[sample,env_id,cost_id] = env.rand_Q(0)

In [11]:
optimal_reward

array([[  4.,   4.,   4.],
       [ 36.,  36.,  36.],
       [ 36.,  36.,  36.],
       [ -4.,  -4.,  -4.],
       [ 28.,  28.,  28.],
       [ 28.,  28.,  28.],
       [ 28.,  28.,  28.],
       [ 28.,  28.,  28.],
       [ 20.,  20.,  20.],
       [ 12.,  12.,  12.],
       [ 28.,  28.,  28.],
       [ 12.,  12.,  12.],
       [ 36.,  36.,  36.],
       [ 44.,  44.,  44.],
       [ 28.,  28.,  28.],
       [ 36.,  36.,  36.]])

In [12]:
worst_reward

array([[-36., -36., -36.],
       [-12., -12., -12.],
       [-20., -20., -20.],
       [-36., -36., -36.],
       [-12., -12., -12.],
       [ -4.,  -4.,  -4.],
       [ -4.,  -4.,  -4.],
       [-60., -60., -60.],
       [-20., -20., -20.],
       [-36., -36., -36.],
       [-44., -44., -44.],
       [-20., -20., -20.],
       [-20., -20., -20.],
       [-36., -36., -36.],
       [-20., -20., -20.],
       [-12., -12., -12.]])

In [13]:
mean_reward

array([[-13., -13., -13.],
       [  5.,   5.,   5.],
       [ 12.,  12.,  12.],
       [-18., -18., -18.],
       [  2.,   2.,   2.],
       [ 10.,  10.,  10.],
       [  9.,   9.,   9.],
       [ -1.,  -1.,  -1.],
       [ -5.,  -5.,  -5.],
       [-11., -11., -11.],
       [ -5.,  -5.,  -5.],
       [ -2.,  -2.,  -2.],
       [  9.,   9.,   9.],
       [ -4.,  -4.,  -4.],
       [  9.,   9.,   9.],
       [ 10.,  10.,  10.]])

In [14]:
lc_pol_reward

array([[  2.6 ,  -9.75,  -4.  ],
       [ 35.5 ,  26.  ,  28.  ],
       [ 35.1 ,  24.25,  24.  ],
       [ -5.5 , -19.  , -12.  ],
       [ 27.3 ,  24.25,  20.  ],
       [ 27.  ,  10.  ,  16.  ],
       [ 27.  ,  16.75,  -8.  ],
       [ 26.7 ,  10.  ,   0.  ],
       [ 19.2 ,  13.75, -28.  ],
       [ 11.  ,   4.5 ,  -4.  ],
       [ 27.1 ,  18.  ,  -4.  ],
       [ 10.7 ,   0.75, -16.  ],
       [ 35.4 ,  16.25,  16.  ],
       [ 43.6 ,  40.25,  16.  ],
       [ 27.2 ,  18.  ,  20.  ],
       [ 34.8 ,  24.75,  -4.  ]])

In [15]:
lc_pol_reward - mean_reward

array([[ 15.6 ,   3.25,   9.  ],
       [ 30.5 ,  21.  ,  23.  ],
       [ 23.1 ,  12.25,  12.  ],
       [ 12.5 ,  -1.  ,   6.  ],
       [ 25.3 ,  22.25,  18.  ],
       [ 17.  ,   0.  ,   6.  ],
       [ 18.  ,   7.75, -17.  ],
       [ 27.7 ,  11.  ,   1.  ],
       [ 24.2 ,  18.75, -23.  ],
       [ 22.  ,  15.5 ,   7.  ],
       [ 32.1 ,  23.  ,   1.  ],
       [ 12.7 ,   2.75, -14.  ],
       [ 26.4 ,   7.25,   7.  ],
       [ 47.6 ,  44.25,  20.  ],
       [ 18.2 ,   9.  ,  11.  ],
       [ 24.8 ,  14.75, -14.  ]])

In [16]:
rel = (lc_pol_reward-mean_reward)/(lc_pol_reward-mean_reward)
rel

  """Entry point for launching an IPython kernel.


array([[  1.,   1.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.],
       [  1.,  nan,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.],
       [  1.,   1.,   1.]])

In [17]:
clicks

array([[ 14.,  11.,   2.],
       [  5.,   8.,   2.],
       [  9.,   3.,   1.],
       [ 15.,  12.,   2.],
       [  7.,   3.,   2.],
       [ 10.,   8.,   1.],
       [ 10.,   9.,   1.],
       [ 13.,   8.,   1.],
       [  8.,   5.,   2.],
       [ 10.,   6.,   2.],
       [  9.,   8.,   2.],
       [ 13.,   9.,   1.],
       [  6.,   3.,   1.],
       [  4.,   3.,   1.],
       [  8.,   8.,   2.],
       [ 12.,   9.,   2.]])

In [18]:
np.mean(clicks[:,2])

1.5625

In [19]:
np.savetxt('../experiments/data/stimuli/exp1/optimal1A.3.csv', lc_pol_reward, delimiter=",")
np.savetxt('../experiments/data/stimuli/exp1/score_pi_star1A.3.csv', lc_pol_reward, delimiter=",")
np.savetxt('../experiments/data/stimuli/exp1/worst1A.3.csv', mean_reward, delimiter=",")
np.savetxt('../experiments/data/stimuli/exp1/rel_score_pi_star1A.3.csv', rel, delimiter=",")
np.savetxt('../experiments/data/stimuli/exp1/nr_observations_pi_star1A.3.csv', clicks, delimiter=",")

# Pilot PRs

In [29]:
costs = [0.10,1.25,4.00] 
stimuli_envs ={str(cost):make_envs([4,1,2], reward, cost, n=16, ground_truth=1) for cost in costs}
stimuli_envs

{'0.1': [<mouselab.MouselabEnv at 0x7fe5d3e47ba8>,
  <mouselab.MouselabEnv at 0x7fe5d3e47f98>,
  <mouselab.MouselabEnv at 0x7fe5d3e475c0>,
  <mouselab.MouselabEnv at 0x7fe5d3e47828>,
  <mouselab.MouselabEnv at 0x7fe5d3e47a90>,
  <mouselab.MouselabEnv at 0x7fe5d3e42dd8>,
  <mouselab.MouselabEnv at 0x7fe5d3e429e8>,
  <mouselab.MouselabEnv at 0x7fe5d3e42860>,
  <mouselab.MouselabEnv at 0x7fe5d3e42cc0>,
  <mouselab.MouselabEnv at 0x7fe5d3e47d68>,
  <mouselab.MouselabEnv at 0x7fe5d3e42198>,
  <mouselab.MouselabEnv at 0x7fe5d3e424e0>,
  <mouselab.MouselabEnv at 0x7fe5d3eb88d0>,
  <mouselab.MouselabEnv at 0x7fe5d3eb8be0>,
  <mouselab.MouselabEnv at 0x7fe5d3eb8f28>,
  <mouselab.MouselabEnv at 0x7fe5d3eb8518>],
 '1.25': [<mouselab.MouselabEnv at 0x7fe5d3efada0>,
  <mouselab.MouselabEnv at 0x7fe5d3e412e8>,
  <mouselab.MouselabEnv at 0x7fe5d3e41588>,
  <mouselab.MouselabEnv at 0x7fe5d3e41f98>,
  <mouselab.MouselabEnv at 0x7fe5d3e41940>,
  <mouselab.MouselabEnv at 0x7fe5d3e41d68>,
  <mouselab.Mous

In [30]:
trials = pd.read_csv('../experiments/data/human/1.8/trials.csv')
trials = trials.loc[trials['info_cost'].isin(costs)]
trials

Unnamed: 0.1,Unnamed: 0,pid,info_cost,PR_type,message,trial_index,trial_id,delays,score,n_click,clicks,click_times,path,action_times
0,0,0,1.25,none,none,1,8969.0,[15],10.75,1,[1],[2869],"[1, 1, 2, 4]","[11219, 28273, 34591]"
1,1,0,1.25,none,none,2,9359.0,[15],0.25,3,"[5, 9, 1]","[1998, 4708, 11014]","[1, 9, 10, 12]","[13147, 32672, 35775]"
2,2,0,1.25,none,none,3,9876.0,[15],13.75,5,"[9, 12, 11, 15, 16]","[1647, 2669, 3494, 9504, 12246]","[1, 9, 10, 11]","[14037, 37723, 38677]"
3,3,0,1.25,none,none,4,6615.0,[15],-1.00,4,"[8, 16, 15, 13]","[1763, 6739, 7771, 8444]","[1, 13, 14, 15]","[10560, 26729, 28393]"
4,4,0,1.25,none,none,5,5157.0,[15],20.00,0,[],[],"[1, 5, 6, 8]","[3405, 23072, 66331]"
5,5,0,1.25,none,none,6,7410.0,[15],16.25,3,"[9, 1, 5]","[5647, 6733, 7580]","[1, 5, 6, 7]","[8692, 25672, 27321]"
6,6,0,1.25,none,none,7,6264.0,[15],8.25,3,"[1, 3, 4]","[1547, 2215, 2997]","[1, 13, 14, 15]","[5194, 21946, 22928]"
7,7,0,1.25,none,none,8,7618.0,[15],-6.50,2,"[9, 5]","[1690, 3103]","[1, 5, 6, 7]","[4244, 20830, 21732]"
8,8,0,1.25,none,none,9,8818.0,[15],10.75,1,[1],[1046],"[1, 1, 2, 3]","[2377, 18945, 19760]"
9,9,0,1.25,none,none,10,7003.0,[15],-15.75,3,"[1, 9, 5]","[1445, 2708, 3284]","[1, 1, 2, 4]","[4442, 21106, 21867]"


In [31]:
trials['pr_sum'] = np.zeros(trials.shape[0])
trials['click_prs'] = [[]]*trials.shape[0]

In [32]:
with open('../experiments/exp1/static/json/q_weights.json') as data_file:    
    q_weights = json.load(data_file)
pprint(q_weights)

{'0.10': [2.053410166049148,
          0.035432105933805545,
          0.027055750247385255,
          0.9432457551263028,
          0.9979765410879625],
 '1.25': [1.7132112787775398,
          0.46426212124574967,
          0.23605538377611573,
          0.4252363266867176,
          0.993457670362997],
 '4.00': [1.0204049741183023,
          0.9892671828885087,
          -0.013161214101093,
          0.01785957275841159,
          1.0004646109245603]}


In [None]:
for index, row in trials.iterrows():
    if index % 20 == 0: print(index)
    cost = row['info_cost']
    s_cost = str(cost)
    trial_id = row['trial_index']
    
#     stimuli_envs = make_envs([4,1,2], reward, cost, n=16, ground_truth=1)
#     menv = stimuli_envs[trial_id-1]
    menv = stimuli_envs[s_cost][trial_id-1]
       
    if s_cost == '0.1': s_cost = '0.10'
    elif s_cost == '4.0': s_cost = '4.00'
    q_weight = q_weights[s_cost]
    
    clicks = ast.literal_eval(row['clicks'])
    clicks.append(17)
    pr_sum = 0
    click_prs = []
    for a in clicks:
        f = menv.action_features(a)
        pr = np.inner(q_weight,f) - np.max([np.inner(q_weight,menv.action_features(a2)) for a2 in menv.actions(menv._state)])
        menv.step(a)
        pr_sum += pr
        click_prs.append(pr)
    trials.set_value(index,'pr_sum',pr_sum)
    trials.set_value(index,'click_prs',click_prs)
    menv.reset()

0
20
40
60
80
100
120
140
160
180
200
220
240
260
280
300
320
340
360
380
400
420
440
460
480
500
520
540
560


In [None]:
csv_ready = trials[['pid','info_cost','trial_index','pr_sum','click_prs']]

In [42]:
csv_ready

Unnamed: 0,pid,info_cost,trial_index,pr_sum,click_prs
0,0,0.10,1,-19.473024,"[0.0, -0.324304015787, -19.1487199638]"
1,0,0.10,2,-14.298082,"[0.0, -0.210504570876, -0.382266880015, -0.382..."
2,0,0.10,3,-1.158616,"[0.0, -0.131482925701, 0.0, 0.0, -1.02713289086]"
3,0,0.10,4,-12.732936,"[0.0, -0.024981893567, -0.213568828633, -0.448..."
4,0,0.10,5,-7.742030,"[0.0, 0.0, 0.0, -0.103683559651, -0.3740098722..."
5,0,0.10,6,-12.339585,"[0.0, 0.0, -0.142487954368, -12.1970973059]"
6,0,0.10,7,-14.684785,"[0.0, -0.0187194016522, -0.335429268645, -0.33..."
7,0,0.10,8,-2.808495,"[0.0, 0.0, -0.210504570876, -0.232311392331, -..."
8,0,0.10,9,-0.836415,"[0.0, -0.210504570876, -0.210504570876, -0.232..."
9,0,0.10,10,-16.712976,"[0.0, 0.0, -0.593528882527, -0.632584865513, -..."


In [43]:
res = trials.set_index(['pid','info_cost','trial_index'])['click_prs'].apply(pd.Series).stack()
res = res.reset_index()
res.columns = ['pid','info_cost','trial_index','click_num','click_pr']
res

Unnamed: 0,pid,info_cost,trial_index,click_num,click_pr
0,0,0.1,1,0,0.000000
1,0,0.1,1,1,-0.324304
2,0,0.1,1,2,-19.148720
3,0,0.1,2,0,0.000000
4,0,0.1,2,1,-0.210505
5,0,0.1,2,2,-0.382267
6,0,0.1,2,3,-0.382267
7,0,0.1,2,4,-0.516120
8,0,0.1,2,5,-0.521028
9,0,0.1,2,6,-12.285895


In [44]:
csv_ready.to_csv('../experiments/data/human/0.995/pilot_prs.csv')
res.to_csv('../experiments/data/human/0.995/pilot_click_prs.csv')

# Object-Level PRs

In [26]:
import json
with open('../backward_planning/experiment1/experiment/static/json/mcrl_trials/increasing.json') as data_file:    
    data = json.load(data_file)

In [27]:
data[0]

{'qs': {'0 _ _ _ _ _ _ _ _ _ _ _ _': {'1': 39.45,
   '10': 39.95,
   '11': 39.97,
   '12': 39.97,
   '13': 0.0,
   '2': 39.95,
   '3': 39.97,
   '4': 39.97,
   '5': 39.45,
   '6': 39.95,
   '7': 39.97,
   '8': 39.97,
   '9': 39.45},
  '0 _ _ _ _ _ _ _ _ _ _ _ -24': {'1': 37.52,
   '10': 37.7,
   '11': 38.09,
   '13': 0.0,
   '2': 37.96,
   '3': 38.09,
   '4': 38.09,
   '5': 37.52,
   '6': 37.96,
   '7': 38.09,
   '8': 38.09,
   '9': 37.39},
  '0 _ _ _ _ _ _ _ _ _ _ -48 _': {'1': 37.47,
   '10': 37.64,
   '12': 38.05,
   '13': 0.0,
   '2': 37.92,
   '3': 38.05,
   '4': 38.05,
   '5': 37.47,
   '6': 37.92,
   '7': 38.05,
   '8': 38.05,
   '9': 37.34},
  '0 _ _ _ _ _ _ _ _ _ _ -48 -24': {'1': 34.37,
   '10': 34.09,
   '13': 0.0,
   '2': 34.75,
   '3': 35.03,
   '4': 35.03,
   '5': 34.37,
   '6': 34.75,
   '7': 35.03,
   '8': 35.03,
   '9': 34.06},
  '0 _ _ _ _ _ _ _ _ _ -4 _ _': {'1': 38.52,
   '11': 38.65,
   '12': 38.65,
   '13': 0.0,
   '2': 38.96,
   '3': 39.19,
   '4': 39.19,
   '5':

In [28]:
data[0].keys()

dict_keys(['qs', 'trial_id', 'stateRewards'])

In [29]:
# trials_dct = {}
inacts = [1,5,9]
directs = ["up","right","left"]
for d in data: 
    env = MouselabEnv.new_symmetric([3,1,2], None, ground_truth=d['stateRewards'], cost=0.01)
    qs0 = env.true_Q(0)

#     dct = {}
    prs = [env.true_Q(inacts[i])-qs0 for i in range(3)] 
    d['prs']={directs[i]: prs[i] for i in range(3)}
    d['opt_act'] = directs[np.argmax(prs)]
    
#     trials_dct[d['trial_id']] = dct

In [30]:
data[0]

{'opt_act': 'up',
 'prs': {'left': -62.0, 'right': -66.0, 'up': 0.0},
 'qs': {'0 _ _ _ _ _ _ _ _ _ _ _ _': {'1': 39.45,
   '10': 39.95,
   '11': 39.97,
   '12': 39.97,
   '13': 0.0,
   '2': 39.95,
   '3': 39.97,
   '4': 39.97,
   '5': 39.45,
   '6': 39.95,
   '7': 39.97,
   '8': 39.97,
   '9': 39.45},
  '0 _ _ _ _ _ _ _ _ _ _ _ -24': {'1': 37.52,
   '10': 37.7,
   '11': 38.09,
   '13': 0.0,
   '2': 37.96,
   '3': 38.09,
   '4': 38.09,
   '5': 37.52,
   '6': 37.96,
   '7': 38.09,
   '8': 38.09,
   '9': 37.39},
  '0 _ _ _ _ _ _ _ _ _ _ -48 _': {'1': 37.47,
   '10': 37.64,
   '12': 38.05,
   '13': 0.0,
   '2': 37.92,
   '3': 38.05,
   '4': 38.05,
   '5': 37.47,
   '6': 37.92,
   '7': 38.05,
   '8': 38.05,
   '9': 37.34},
  '0 _ _ _ _ _ _ _ _ _ _ -48 -24': {'1': 34.37,
   '10': 34.09,
   '13': 0.0,
   '2': 34.75,
   '3': 35.03,
   '4': 35.03,
   '5': 34.37,
   '6': 34.75,
   '7': 35.03,
   '8': 35.03,
   '9': 34.06},
  '0 _ _ _ _ _ _ _ _ _ -4 _ _': {'1': 38.52,
   '11': 38.65,
   '12': 38.

In [31]:
with open('../backward_planning/experiment1/experiment/static/json/mcrl_trials/increasing_prs.json', 'w') as fout:
    json.dump(data, fout)

In [32]:
trials_dct = {}
inacts = [1,5,9]
directs = ["up","right","left"]
for d in data: 
    env = MouselabEnv.new_symmetric([3,1,2], None, ground_truth=d['stateRewards'], cost=0.01)
    qs0 = env.true_Q(0)

    dct = {}
    prs = [env.true_Q(inacts[i])-qs0 for i in range(3)] 
    dct['prs']={directs[i]: prs[i] for i in range(3)}
    dct['opt_act'] = directs[np.argmax(prs)]
    
    trials_dct[d['trial_id']] = dct

In [33]:
with open('../backward_planning/experiment1/experiment/static/json/object_prs.json', 'w') as fout:
    json.dump(trials_dct, fout)