In [23]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import json
from agents import Agent
from mouselab import MouselabEnv
from distributions import Normal, Categorical
from model_utils import read_bo_policy
from policies import FixedPlanPolicy, LiederPolicy, RandomTreePolicy
from evaluation import *
from pprint import pprint
import ast

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [24]:
dist = Normal(0,10)
reward = dist.to_discrete(6)
costs = [0.25,1.00,4.00]
stimuli_envs ={str(cost):make_envs([4,1,2], reward, cost, n=16, ground_truth=1) for cost in costs}
stimuli_envs

{'0.25': [<mouselab.MouselabEnv at 0x2b0af6b7cba8>,
  <mouselab.MouselabEnv at 0x2b0af6b7ca58>,
  <mouselab.MouselabEnv at 0x2b0afff14dd8>,
  <mouselab.MouselabEnv at 0x2b0af6b74d30>,
  <mouselab.MouselabEnv at 0x2b0af6b740b8>,
  <mouselab.MouselabEnv at 0x2b0af6b74320>,
  <mouselab.MouselabEnv at 0x2b0af6b74208>,
  <mouselab.MouselabEnv at 0x2b0af6b74828>,
  <mouselab.MouselabEnv at 0x2b0af6b749e8>,
  <mouselab.MouselabEnv at 0x2b0afff28208>,
  <mouselab.MouselabEnv at 0x2b0afff28550>,
  <mouselab.MouselabEnv at 0x2b0afff28898>,
  <mouselab.MouselabEnv at 0x2b0afff28a58>,
  <mouselab.MouselabEnv at 0x2b0afff28cf8>,
  <mouselab.MouselabEnv at 0x2b0afff1d080>,
  <mouselab.MouselabEnv at 0x2b0afff1d3c8>],
 '1.0': [<mouselab.MouselabEnv at 0x2b0afff1d748>,
  <mouselab.MouselabEnv at 0x2b0afff1da90>,
  <mouselab.MouselabEnv at 0x2b0afff1ddd8>,
  <mouselab.MouselabEnv at 0x2b0afff2c160>,
  <mouselab.MouselabEnv at 0x2b0afff2c4a8>,
  <mouselab.MouselabEnv at 0x2b0afff2c7f0>,
  <mouselab.Mous

In [253]:
optimal_reward = np.zeros((16,3))
worst_reward = np.zeros((16,3))
mean_reward = np.zeros((16,3))

lc_pol_reward = np.zeros((16,3))
clicks = np.zeros((16,3))

# samples2 = np.zeros((5000,16,3))

for cost_id in range(3):
    
    policy = read_bo_policy(costs[cost_id])
    envs =  stimuli_envs[str(costs[cost_id])]
    for env in envs:
        env.sample_term_reward = True
    ev = evaluate(policy, envs)
    
    lc_pol_reward[:,cost_id] = ev['util']
    clicks[:,cost_id] = ev['observations']
    
#     for i in range(5000):
#         random_policy = RandomTreePolicy()
#         samples2[i,:,cost_id] = evaluate(random_policy, envs)['util']
        
    for env_id in range(16):
        env = envs[env_id]
        optimal_reward[env_id,cost_id] = env.true_Q(0)
        worst_reward[env_id,cost_id] = env.worst_Q(0)
        mean_reward[env_id,cost_id] = env.mean_Q(0)

In [199]:
rand_reward_samples2 = np.zeros((10000,16,3))
for sample in range(10000):
    for cost_id in range(3):
        envs =  stimuli_envs[str(costs[cost_id])]
        for env_id in range(16):
            env = envs[env_id]
            rand_reward_samples2[sample,env_id,cost_id] = env.rand_Q(0)

In [254]:
optimal_reward

array([[  4.,   4.,   4.],
       [ 36.,  36.,  36.],
       [ 36.,  36.,  36.],
       [ -4.,  -4.,  -4.],
       [ 28.,  28.,  28.],
       [ 28.,  28.,  28.],
       [ 28.,  28.,  28.],
       [ 28.,  28.,  28.],
       [ 20.,  20.,  20.],
       [ 12.,  12.,  12.],
       [ 28.,  28.,  28.],
       [ 12.,  12.,  12.],
       [ 36.,  36.,  36.],
       [ 44.,  44.,  44.],
       [ 28.,  28.,  28.],
       [ 36.,  36.,  36.]])

In [255]:
worst_reward

array([[-36., -36., -36.],
       [-12., -12., -12.],
       [-20., -20., -20.],
       [-36., -36., -36.],
       [-12., -12., -12.],
       [ -4.,  -4.,  -4.],
       [ -4.,  -4.,  -4.],
       [-60., -60., -60.],
       [-20., -20., -20.],
       [-36., -36., -36.],
       [-44., -44., -44.],
       [-20., -20., -20.],
       [-20., -20., -20.],
       [-36., -36., -36.],
       [-20., -20., -20.],
       [-12., -12., -12.]])

In [256]:
lc_pol_reward

array([[  0.5 ,  -6.  , -12.  ],
       [ 34.75,  31.  ,  20.  ],
       [ 33.75,  25.  ,  24.  ],
       [ -8.  , -14.  , -12.  ],
       [ 26.  ,  24.  ,  20.  ],
       [ 25.5 ,  15.  ,  16.  ],
       [ 24.75,  19.  ,  -8.  ],
       [ 24.75,  12.  ,   0.  ],
       [ 17.75,  12.  , -12.  ],
       [  9.5 ,   2.  , -36.  ],
       [ 25.75,  20.  ,  -4.  ],
       [  8.5 ,   3.  , -16.  ],
       [ 34.25,  30.  ,  16.  ],
       [ 43.  ,  40.  ,  16.  ],
       [ 26.  ,  23.  ,  12.  ],
       [ 33.  ,  27.  , -20.  ]])

In [211]:
old_samples = all_samples

In [257]:
print(np.shape(all_samples))
base = np.mean(all_samples,0)
base

(10000, 16, 3)


array([[ -5.652, -12.   , -35.96 ],
       [ 21.184,  14.7  ,  -9.267],
       [ 26.378,  20.207,  -3.902],
       [-14.105, -19.911, -43.824],
       [ 16.68 ,  10.912, -13.416],
       [ 21.841,  15.982,  -7.966],
       [ 14.802,   9.031, -14.937],
       [ 18.707,  12.727, -11.105],
       [  6.944,   0.849, -23.149],
       [ -1.634,  -7.917, -31.574],
       [ 13.815,   7.597, -16.462],
       [  3.217,  -2.97 , -26.935],
       [ 24.145,  17.952,  -6.109],
       [ 30.644,  24.647,   0.562],
       [ 18.166,  12.041, -11.97 ],
       [ 23.149,  17.208,  -6.922]])

In [258]:
1.96*np.std(all_samples,0)/np.sqrt(10000)

array([[ 0.201,  0.176,  0.299],
       [ 0.335,  0.301,  0.305],
       [ 0.185,  0.177,  0.341],
       [ 0.21 ,  0.186,  0.32 ],
       [ 0.284,  0.248,  0.308],
       [ 0.139,  0.13 ,  0.336],
       [ 0.215,  0.179,  0.289],
       [ 0.141,  0.118,  0.311],
       [ 0.229,  0.197,  0.294],
       [ 0.296,  0.265,  0.297],
       [ 0.281,  0.248,  0.294],
       [ 0.166,  0.138,  0.301],
       [ 0.228,  0.212,  0.344],
       [ 0.288,  0.254,  0.321],
       [ 0.163,  0.142,  0.316],
       [ 0.251,  0.223,  0.305]])

In [259]:
rand_reward_samples = np.concatenate((rand_reward_samples,rand_reward_samples2))
rand_reward = np.mean(rand_reward_samples,axis=0)
rand_reward

array([[-13.12 , -13.164, -12.94 ],
       [  5.037,   4.982,   5.059],
       [ 11.88 ,  11.841,  12.137],
       [-17.931, -17.875, -18.037],
       [  2.001,   1.957,   2.061],
       [  9.916,  10.038,  10.134],
       [  8.936,   9.   ,   8.932],
       [ -0.875,  -0.549,  -0.886],
       [ -4.952,  -5.002,  -4.958],
       [-10.957, -11.012, -11.04 ],
       [ -5.013,  -5.014,  -4.894],
       [ -2.007,  -2.044,  -1.976],
       [  8.842,   8.887,   8.84 ],
       [ -3.804,  -4.018,  -4.143],
       [  8.991,   9.022,   8.952],
       [ 10.196,  10.072,   9.834]])

In [260]:
1.96*np.std(rand_reward_samples,0)/np.sqrt(20000)

array([[ 0.196,  0.196,  0.194],
       [ 0.244,  0.244,  0.245],
       [ 0.247,  0.248,  0.247],
       [ 0.164,  0.164,  0.164],
       [ 0.189,  0.19 ,  0.19 ],
       [ 0.182,  0.181,  0.182],
       [ 0.157,  0.155,  0.156],
       [ 0.426,  0.422,  0.425],
       [ 0.203,  0.203,  0.203],
       [ 0.233,  0.232,  0.231],
       [ 0.308,  0.306,  0.304],
       [ 0.144,  0.145,  0.144],
       [ 0.254,  0.253,  0.253],
       [ 0.354,  0.354,  0.355],
       [ 0.2  ,  0.199,  0.201],
       [ 0.221,  0.219,  0.22 ]])

In [261]:
mean_reward

array([[-13., -13., -13.],
       [  5.,   5.,   5.],
       [ 12.,  12.,  12.],
       [-18., -18., -18.],
       [  2.,   2.,   2.],
       [ 10.,  10.,  10.],
       [  9.,   9.,   9.],
       [ -1.,  -1.,  -1.],
       [ -5.,  -5.,  -5.],
       [-11., -11., -11.],
       [ -5.,  -5.,  -5.],
       [ -2.,  -2.,  -2.],
       [  9.,   9.,   9.],
       [ -4.,  -4.,  -4.],
       [  9.,   9.,   9.],
       [ 10.,  10.,  10.]])

In [262]:
envs[6].ground_truth

array([  0.,  12.,   4., -20.,   4.,  -4.,   4.,  12.,  -4.,  12.,   4., -20.,  12.,   4.,  20., -12., -12.])

In [263]:
lc_pol_reward - mean_reward

array([[ 13.5 ,   7.  ,   1.  ],
       [ 29.75,  26.  ,  15.  ],
       [ 21.75,  13.  ,  12.  ],
       [ 10.  ,   4.  ,   6.  ],
       [ 24.  ,  22.  ,  18.  ],
       [ 15.5 ,   5.  ,   6.  ],
       [ 15.75,  10.  , -17.  ],
       [ 25.75,  13.  ,   1.  ],
       [ 22.75,  17.  ,  -7.  ],
       [ 20.5 ,  13.  , -25.  ],
       [ 30.75,  25.  ,   1.  ],
       [ 10.5 ,   5.  , -14.  ],
       [ 25.25,  21.  ,   7.  ],
       [ 47.  ,  44.  ,  20.  ],
       [ 17.  ,  14.  ,   3.  ],
       [ 23.  ,  17.  , -30.  ]])

In [264]:
rel = (lc_pol_reward-mean_reward)/(lc_pol_reward-mean_reward)
rel

array([[ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.]])

In [265]:
clicks

array([[ 14.,  10.,   2.],
       [  5.,   5.,   2.],
       [  9.,   3.,   1.],
       [ 16.,  10.,   2.],
       [  8.,   4.,   2.],
       [ 10.,   5.,   1.],
       [ 13.,   9.,   1.],
       [ 13.,   8.,   1.],
       [  9.,   8.,   2.],
       [ 10.,  10.,   2.],
       [  9.,   8.,   2.],
       [ 14.,   9.,   1.],
       [  7.,   6.,   1.],
       [  4.,   4.,   1.],
       [  8.,   5.,   2.],
       [ 12.,   9.,   2.]])

In [266]:
np.mean(clicks[:,2])

1.5625

In [267]:
np.savetxt('../experiments/data/stimuli/exp1/optimal1A.2.csv', lc_pol_reward, delimiter=",")
np.savetxt('../experiments/data/stimuli/exp1/worst1A.2.csv', mean_reward, delimiter=",")
np.savetxt('../experiments/data/stimuli/exp1/rel_score_pi_star1A.2.csv', rel, delimiter=",")
np.savetxt('../experiments/data/stimuli/exp1/nr_observations_pi_star1A.2.csv', clicks, delimiter=",")

In [251]:
lc_pol_reward

array([[  0.5 ,  -6.  , -12.  ],
       [ 34.75,  31.  ,  20.  ],
       [ 33.75,  25.  ,  24.  ],
       [ -8.  , -14.  , -12.  ],
       [ 26.  ,  24.  ,  20.  ],
       [ 25.5 ,  15.  ,  16.  ],
       [ 24.75,  19.  ,  -8.  ],
       [ 24.75,  12.  ,   0.  ],
       [ 17.75,  12.  , -12.  ],
       [  9.5 ,   2.  , -36.  ],
       [ 25.75,  20.  ,  -4.  ],
       [  8.5 ,   3.  , -16.  ],
       [ 34.25,  30.  ,  16.  ],
       [ 43.  ,  40.  ,  16.  ],
       [ 26.  ,  23.  ,  12.  ],
       [ 33.  ,  27.  , -20.  ]])

In [194]:
clicks

array([[ 14.,  12.,   4.],
       [  5.,   8.,   3.],
       [  9.,   9.,   2.],
       [ 16.,   8.,   4.],
       [  8.,   6.,   3.],
       [ 10.,   9.,   2.],
       [ 13.,   9.,   2.],
       [ 13.,  13.,   2.],
       [  9.,   9.,   3.],
       [ 10.,  11.,   3.],
       [  8.,   8.,   4.],
       [ 14.,  13.,   2.],
       [  7.,   5.,   2.],
       [  4.,   3.,   2.],
       [  8.,  10.,   3.],
       [ 12.,  10.,   3.]])

# Pilot PRs

In [27]:
costs = [0.10,1.25,4.00] 
stimuli_envs ={str(cost):make_envs([4,1,2], reward, cost, n=16, ground_truth=1) for cost in costs}
stimuli_envs

{'0.1': [<mouselab.MouselabEnv at 0x2b0afff46d68>,
  <mouselab.MouselabEnv at 0x2b0afff46f98>,
  <mouselab.MouselabEnv at 0x2b0afff4b320>,
  <mouselab.MouselabEnv at 0x2b0afff4b668>,
  <mouselab.MouselabEnv at 0x2b0afff4b9b0>,
  <mouselab.MouselabEnv at 0x2b0afff4bcf8>,
  <mouselab.MouselabEnv at 0x2b0af6b7c400>,
  <mouselab.MouselabEnv at 0x2b0af6b7c320>,
  <mouselab.MouselabEnv at 0x2b0af6b52f98>,
  <mouselab.MouselabEnv at 0x2b0af6b52c50>,
  <mouselab.MouselabEnv at 0x2b0afff52208>,
  <mouselab.MouselabEnv at 0x2b0afff52550>,
  <mouselab.MouselabEnv at 0x2b0afff52898>,
  <mouselab.MouselabEnv at 0x2b0afff52be0>,
  <mouselab.MouselabEnv at 0x2b0afff52f28>,
  <mouselab.MouselabEnv at 0x2b0afff572b0>],
 '1.25': [<mouselab.MouselabEnv at 0x2b0afff57630>,
  <mouselab.MouselabEnv at 0x2b0afff57978>,
  <mouselab.MouselabEnv at 0x2b0afff57cc0>,
  <mouselab.MouselabEnv at 0x2b0afff5a048>,
  <mouselab.MouselabEnv at 0x2b0afff5a390>,
  <mouselab.MouselabEnv at 0x2b0afff5a6d8>,
  <mouselab.Mous

In [36]:
trials = pd.read_csv('../experiments/data/human/0.995/trials.csv')
trials = trials.loc[trials['info_cost'].isin(costs)]

In [37]:
trials['pr_sum'] = np.zeros(trials.shape[0])
trials['click_prs'] = [[]]*trials.shape[0]

In [38]:
def make_env(depth, reward, cost, ground_truth=False):
    """Returns a MouselabEnv with given depth and cost. 
    If `ground_truth` is True, the reward observed at a given node will be
    constant across runs on this env. This reduces variance of the return."""
    env = MouselabEnv(depth, reward=reward, cost=cost)
    if ground_truth:
        env.ground_truth = np.array([0, *reward.sample(len(env.tree) - 1)])
    return env

def make_envs(depth, reward, cost, n=100, ground_truth=None):
    # Note, ground_truth can be an int in which case it acts as a random seed.
    if ground_truth is not None:
        np.random.seed(ground_truth)
        return [make_env(depth, reward, cost, True) for _ in range(n)]
    else:
        return [make_env(depth, reward, cost, False)] * n

In [39]:
with open('data/q_weights.json') as data_file:    
    q_weights = json.load(data_file)
pprint(q_weights)

{'0.10': [4.645176951827218,
          0.04783056305614096,
          0.044789706164195975,
          0.9338160228125272,
          0.9994239339903965],
 '1.25': [1.7807939607281613,
          0.45246829707936637,
          0.27572911981296844,
          0.39014475114599445,
          0.9905875327727384],
 '4.00': [1.0066519566667151,
          0.9887916151065947,
          -0.00820497960640427,
          0.006948313322695628,
          1.0003951332873617]}


In [None]:
for index, row in trials.iterrows():
    if index % 20 == 0: print(index)
    cost = row['info_cost']
    s_cost = str(cost)
    trial_id = row['trial_index']
    
#     stimuli_envs = make_envs([4,1,2], reward, cost, n=16, ground_truth=1)
#     menv = stimuli_envs[trial_id-1]
    menv = stimuli_envs[s_cost][trial_id-1]
       
    if s_cost == '0.1': s_cost = '0.10'
    elif s_cost == '4.0': s_cost = '4.00'
    q_weight = q_weights[s_cost]
    
    clicks = ast.literal_eval(row['clicks'])
    clicks.append(17)
    pr_sum = 0
    click_prs = []
    for a in clicks:
        f = menv.action_features(a)
        pr = np.inner(q_weight,f) - np.max([np.inner(q_weight,menv.action_features(a2)) for a2 in menv.actions(menv._state)])
        menv.step(a)
        pr_sum += pr
        click_prs.append(pr)
    trials.set_value(index,'pr_sum',pr_sum)
    trials.set_value(index,'click_prs',click_prs)
    menv.reset()

0
40
60
120
280
360
380
400
460
480
580
600
620
640
660
680
740
760


In [None]:
csv_ready = trials[['pid','info_cost','trial_index','pr_sum','click_prs']]

In [None]:
csv_ready

In [39]:
res = trials.set_index(['pid','info_cost','trial_index'])['click_prs'].apply(pd.Series).stack()
res = res.reset_index()
res.columns = ['pid','info_cost','trial_index','click_num','click_pr']
res

Unnamed: 0,pid,info_cost,trial_index,click_num,click_pr
0,0,1.0,1,0,0.000000
1,0,1.0,1,1,-3.489671
2,0,1.0,1,2,-4.388081
3,0,1.0,1,3,-4.388081
4,0,1.0,1,4,0.000000
5,0,1.0,1,5,-1.839783
6,0,1.0,1,6,-1.502107
7,0,1.0,1,7,-1.360207
8,0,1.0,1,8,0.000000
9,0,1.0,1,9,-1.253846


In [50]:
csv_ready.to_csv('../experiments/data/human/0.991/pilot_prs.csv')
res.to_csv('../experiments/data/human/0.991/pilot_click_prs.csv')