In [233]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import json
from agents import Agent
from mouselab import MouselabEnv
from distributions import Normal, Categorical
from model_utils import read_bo_policy
from policies import FixedPlanPolicy, LiederPolicy, RandomTreePolicy
from evaluation import *
from pprint import pprint
import ast

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [38]:
trials = pd.read_csv('../experiments/data/human/0.991/trials.csv')

In [3]:
trials

Unnamed: 0.1,Unnamed: 0,pid,info_cost,PR_type,message,trial_index,trial_i,delays,score,n_click,clicks,click_times,path,action_times
0,0,0,1.00,none,none,1,,[3],12.00,16,"[5, 6, 7, 8, 9, 10, 12, 11, 1, 2, 3, 4, 13, 14...","[2692, 3813, 4808, 7601, 13713, 15045, 15845, ...","[1, 5, 6, 7]","[71892, 77817, 81863]"
1,1,0,1.00,none,none,2,,[3],26.00,2,"[5, 9]","[3801, 5825]","[1, 9, 10, 12]","[7534, 21008, 22827]"
2,2,0,1.00,none,none,3,,[3],26.00,2,"[5, 6]","[2081, 3815]","[1, 5, 6, 8]","[5649, 14086, 16738]"
3,3,0,1.00,none,none,4,,[3],-8.00,4,"[5, 13, 9, 1]","[1609, 2946, 4339, 5745]","[1, 5, 6, 7]","[8788, 15820, 17077]"
4,4,0,1.00,none,none,5,,[3],19.00,1,[5],[3112],"[1, 5, 6, 7]","[5477, 9516, 14710]"
5,5,0,1.00,none,none,6,,[3],4.00,0,[],[],"[1, 5, 6, 8]","[3077, 7200, 10754]"
6,6,0,1.00,none,none,7,,[3],28.00,0,[],[],"[1, 9, 10, 12]","[2198, 6520, 8274]"
7,7,0,1.00,none,none,8,,[3],-4.00,0,[],[],"[1, 1, 2, 4]","[3118, 6943, 8352]"
8,8,0,1.00,none,none,9,,[3],4.00,0,[],[],"[1, 5, 6, 7]","[2954, 6727, 8846]"
9,9,0,1.00,none,none,10,,[3],-8.00,4,"[5, 9, 13, 1]","[7675, 8558, 9695, 10726]","[1, 9, 10, 12]","[12048, 25717, 27326]"


In [196]:
def make_env(depth, reward, cost, ground_truth=False):
    """Returns a MouselabEnv with given depth and cost. 
    If `ground_truth` is True, the reward observed at a given node will be
    constant across runs on this env. This reduces variance of the return."""
    env = MouselabEnv(depth, reward=reward, cost=cost)
    if ground_truth:
        env.ground_truth = np.array([0, *reward.sample(len(env.tree) - 1)])
    return env

def make_envs(depth, reward, cost, n=100, ground_truth=None):
    # Note, ground_truth can be an int in which case it acts as a random seed.
    if ground_truth is not None:
        np.random.seed(ground_truth)
        return [make_env(depth, reward, cost, True) for _ in range(n)]
    else:
        return [make_env(depth, reward, cost, False)] * n

In [4]:
with open('../experiments/exp1/static/json/q_weights.json') as data_file:    
    q_weights = json.load(data_file)
pprint(q_weights)

{'0.25': [2.3252393134113225,
          0.09975155227317743,
          0.08032182729437333,
          0.8450937421220647,
          0.997435322775085],
 '1.00': [1.9484115264310722,
          0.4288647055137153,
          0.269370854918958,
          0.44258532182537325,
          0.9912367388407017],
 '4.00': [1.02455453397985,
          0.9723698649146737,
          0.0048616156760657794,
          0.01790135732732076,
          1.000241178138463]}


In [14]:
trials['pr_sum'] = np.zeros(trials.shape[0])
trials['click_prs'] = [[]]*trials.shape[0]

In [240]:
dist = Normal(0,10)
reward = dist.to_discrete(6)
costs = [0.25,1.00,4.00]
stimuli_envs ={str(cost):make_envs([4,1,2], reward, cost, n=16, ground_truth=1) for cost in costs}
stimuli_envs

{'0.25': [<mouselab.MouselabEnv at 0x2b931fda10b8>,
  <mouselab.MouselabEnv at 0x2b9321624518>,
  <mouselab.MouselabEnv at 0x2b931fb0fe10>,
  <mouselab.MouselabEnv at 0x2b931fb0fba8>,
  <mouselab.MouselabEnv at 0x2b931fb0f0f0>,
  <mouselab.MouselabEnv at 0x2b931fb0f668>,
  <mouselab.MouselabEnv at 0x2b931fb0f860>,
  <mouselab.MouselabEnv at 0x2b931f01ec88>,
  <mouselab.MouselabEnv at 0x2b9335b3d160>,
  <mouselab.MouselabEnv at 0x2b9335a67dd8>,
  <mouselab.MouselabEnv at 0x2b9335a67cf8>,
  <mouselab.MouselabEnv at 0x2b934a01dcf8>,
  <mouselab.MouselabEnv at 0x2b934a01d0b8>,
  <mouselab.MouselabEnv at 0x2b934a01d9b0>,
  <mouselab.MouselabEnv at 0x2b934a01d208>,
  <mouselab.MouselabEnv at 0x2b934a01d5c0>],
 '1.0': [<mouselab.MouselabEnv at 0x2b934bc7c5c0>,
  <mouselab.MouselabEnv at 0x2b934bc7c470>,
  <mouselab.MouselabEnv at 0x2b9349ff7358>,
  <mouselab.MouselabEnv at 0x2b9349ff7208>,
  <mouselab.MouselabEnv at 0x2b9349ff70f0>,
  <mouselab.MouselabEnv at 0x2b93246cfa58>,
  <mouselab.Mous

In [241]:
optimal_reward = np.zeros((16,3))
worst_reward = np.zeros((16,3))
mean_reward = np.zeros((16,3))

lc_pol_reward = np.zeros((16,3))
clicks = np.zeros((16,3))

# samples2 = np.zeros((5000,16,3))

for cost_id in range(3):
    
    policy = read_bo_policy(costs[cost_id])
    envs =  stimuli_envs[str(costs[cost_id])]
    for env in envs:
        env.sample_term_reward = True
    ev = evaluate(policy, envs)
    
    lc_pol_reward[:,cost_id] = ev['util']
    clicks[:,cost_id] = ev['observations']
    
#     for i in range(5000):
#         random_policy = RandomTreePolicy()
#         samples2[i,:,cost_id] = evaluate(random_policy, envs)['util']
        
    for env_id in range(16):
        env = envs[env_id]
        optimal_reward[env_id,cost_id] = env.true_Q(0)
        worst_reward[env_id,cost_id] = env.worst_Q(0)
        mean_reward[env_id,cost_id] = env.mean_Q(0)

In [199]:
rand_reward_samples2 = np.zeros((10000,16,3))
for sample in range(10000):
    for cost_id in range(3):
        envs =  stimuli_envs[str(costs[cost_id])]
        for env_id in range(16):
            env = envs[env_id]
            rand_reward_samples2[sample,env_id,cost_id] = env.rand_Q(0)

In [242]:
optimal_reward

array([[  4.,   4.,   4.],
       [ 36.,  36.,  36.],
       [ 36.,  36.,  36.],
       [ -4.,  -4.,  -4.],
       [ 28.,  28.,  28.],
       [ 28.,  28.,  28.],
       [ 28.,  28.,  28.],
       [ 28.,  28.,  28.],
       [ 20.,  20.,  20.],
       [ 12.,  12.,  12.],
       [ 28.,  28.,  28.],
       [ 12.,  12.,  12.],
       [ 36.,  36.,  36.],
       [ 44.,  44.,  44.],
       [ 28.,  28.,  28.],
       [ 36.,  36.,  36.]])

In [243]:
worst_reward

array([[-36., -36., -36.],
       [-12., -12., -12.],
       [-20., -20., -20.],
       [-36., -36., -36.],
       [-12., -12., -12.],
       [ -4.,  -4.,  -4.],
       [ -4.,  -4.,  -4.],
       [-60., -60., -60.],
       [-20., -20., -20.],
       [-36., -36., -36.],
       [-44., -44., -44.],
       [-20., -20., -20.],
       [-20., -20., -20.],
       [-36., -36., -36.],
       [-20., -20., -20.],
       [-12., -12., -12.]])

In [244]:
lc_pol_reward

array([[  0.5 ,  -6.  , -12.  ],
       [ 34.75,  31.  ,  20.  ],
       [ 33.75,  25.  ,  24.  ],
       [ -8.  , -14.  , -12.  ],
       [ 26.  ,  24.  ,  20.  ],
       [ 25.5 ,  15.  ,  16.  ],
       [ 24.75,  19.  ,  -8.  ],
       [ 24.75,  12.  ,   0.  ],
       [ 17.75,  12.  , -12.  ],
       [  9.5 ,   2.  , -36.  ],
       [ 25.75,  20.  ,  -4.  ],
       [  8.5 ,   3.  , -16.  ],
       [ 34.25,  30.  ,  16.  ],
       [ 43.  ,  40.  ,  16.  ],
       [ 26.  ,  23.  ,  12.  ],
       [ 33.  ,  27.  , -20.  ]])

In [211]:
old_samples = all_samples

In [212]:
print(np.shape(all_samples))
base = np.mean(all_samples,0)
base

(10000, 16, 3)


array([[ -5.652, -12.   , -35.96 ],
       [ 21.184,  14.7  ,  -9.267],
       [ 26.378,  20.207,  -3.902],
       [-14.105, -19.911, -43.824],
       [ 16.68 ,  10.912, -13.416],
       [ 21.841,  15.982,  -7.966],
       [ 14.802,   9.031, -14.937],
       [ 18.707,  12.727, -11.105],
       [  6.944,   0.849, -23.149],
       [ -1.634,  -7.917, -31.574],
       [ 13.815,   7.597, -16.462],
       [  3.217,  -2.97 , -26.935],
       [ 24.145,  17.952,  -6.109],
       [ 30.644,  24.647,   0.562],
       [ 18.166,  12.041, -11.97 ],
       [ 23.149,  17.208,  -6.922]])

In [213]:
1.96*np.std(all_samples,0)/np.sqrt(10000)

array([[ 0.201,  0.176,  0.299],
       [ 0.335,  0.301,  0.305],
       [ 0.185,  0.177,  0.341],
       [ 0.21 ,  0.186,  0.32 ],
       [ 0.284,  0.248,  0.308],
       [ 0.139,  0.13 ,  0.336],
       [ 0.215,  0.179,  0.289],
       [ 0.141,  0.118,  0.311],
       [ 0.229,  0.197,  0.294],
       [ 0.296,  0.265,  0.297],
       [ 0.281,  0.248,  0.294],
       [ 0.166,  0.138,  0.301],
       [ 0.228,  0.212,  0.344],
       [ 0.288,  0.254,  0.321],
       [ 0.163,  0.142,  0.316],
       [ 0.251,  0.223,  0.305]])

In [214]:
rand_reward_samples = np.concatenate((rand_reward_samples,rand_reward_samples2))
rand_reward = np.mean(rand_reward_samples,axis=0)
rand_reward

array([[-13.102, -13.124, -12.942],
       [  5.038,   5.013,   5.053],
       [ 11.858,  11.826,  12.106],
       [-17.908, -17.91 , -18.027],
       [  2.011,   2.006,   2.064],
       [  9.944,  10.063,  10.119],
       [  8.977,   9.016,   8.956],
       [ -0.888,  -0.624,  -0.921],
       [ -4.936,  -4.971,  -4.943],
       [-10.938, -10.97 , -11.035],
       [ -5.053,  -5.023,  -4.917],
       [ -1.98 ,  -2.025,  -1.983],
       [  8.866,   8.905,   8.787],
       [ -3.801,  -3.962,  -4.013],
       [  8.974,   9.035,   8.942],
       [ 10.147,  10.011,   9.857]])

In [215]:
1.96*np.std(rand_reward_samples,0)/np.sqrt(20000)

array([[ 0.196,  0.196,  0.195],
       [ 0.245,  0.244,  0.245],
       [ 0.247,  0.248,  0.248],
       [ 0.163,  0.164,  0.164],
       [ 0.19 ,  0.19 ,  0.19 ],
       [ 0.182,  0.182,  0.182],
       [ 0.157,  0.156,  0.156],
       [ 0.426,  0.422,  0.425],
       [ 0.203,  0.203,  0.203],
       [ 0.233,  0.232,  0.231],
       [ 0.308,  0.306,  0.304],
       [ 0.144,  0.145,  0.144],
       [ 0.254,  0.253,  0.253],
       [ 0.355,  0.355,  0.355],
       [ 0.2  ,  0.199,  0.201],
       [ 0.221,  0.219,  0.22 ]])

In [245]:
mean_reward

array([[-13., -13., -13.],
       [  5.,   5.,   5.],
       [ 12.,  12.,  12.],
       [-18., -18., -18.],
       [  2.,   2.,   2.],
       [ 10.,  10.,  10.],
       [  9.,   9.,   9.],
       [ -1.,  -1.,  -1.],
       [ -5.,  -5.,  -5.],
       [-11., -11., -11.],
       [ -5.,  -5.,  -5.],
       [ -2.,  -2.,  -2.],
       [  9.,   9.,   9.],
       [ -4.,  -4.,  -4.],
       [  9.,   9.,   9.],
       [ 10.,  10.,  10.]])

In [227]:
envs[6].ground_truth

array([  0.,  12.,   4., -20.,   4.,  -4.,   4.,  12.,  -4.,  12.,   4., -20.,  12.,   4.,  20., -12., -12.])

In [246]:
lc_pol_reward - mean_reward

array([[ 13.5 ,   7.  ,   1.  ],
       [ 29.75,  26.  ,  15.  ],
       [ 21.75,  13.  ,  12.  ],
       [ 10.  ,   4.  ,   6.  ],
       [ 24.  ,  22.  ,  18.  ],
       [ 15.5 ,   5.  ,   6.  ],
       [ 15.75,  10.  , -17.  ],
       [ 25.75,  13.  ,   1.  ],
       [ 22.75,  17.  ,  -7.  ],
       [ 20.5 ,  13.  , -25.  ],
       [ 30.75,  25.  ,   1.  ],
       [ 10.5 ,   5.  , -14.  ],
       [ 25.25,  21.  ,   7.  ],
       [ 47.  ,  44.  ,  20.  ],
       [ 17.  ,  14.  ,   3.  ],
       [ 23.  ,  17.  , -30.  ]])

In [247]:
rel = (lc_pol_reward-mean_reward)/(lc_pol_reward-mean_reward)
rel

array([[ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.]])

In [248]:
clicks

array([[ 14.,  10.,   2.],
       [  5.,   5.,   2.],
       [  9.,   3.,   1.],
       [ 16.,  10.,   2.],
       [  8.,   4.,   2.],
       [ 10.,   5.,   1.],
       [ 13.,   9.,   1.],
       [ 13.,   8.,   1.],
       [  9.,   8.,   2.],
       [ 10.,  10.,   2.],
       [  9.,   8.,   2.],
       [ 14.,   9.,   1.],
       [  7.,   6.,   1.],
       [  4.,   4.,   1.],
       [  8.,   5.,   2.],
       [ 12.,   9.,   2.]])

In [231]:
np.mean(clicks[:,2])

1.75

In [232]:
np.savetxt('../experiments/data/stimuli/exp1/optimal1A.2.csv', lc_pol_reward, delimiter=",")
np.savetxt('../experiments/data/stimuli/exp1/worst1A.2.csv', mean_reward, delimiter=",")
np.savetxt('../experiments/data/stimuli/exp1/rel_score_pi_star1A.2.csv', rel, delimiter=",")
np.savetxt('../experiments/data/stimuli/exp1/nr_observations_pi_star1A.2.csv', clicks, delimiter=",")

In [191]:
lc_pol_reward

array([[  0.5 ,  -8.  , -12.  ],
       [ 34.75,  28.  ,  24.  ],
       [ 33.75,  27.  ,  20.  ],
       [ -8.  , -12.  , -20.  ],
       [ 26.  ,  22.  ,  16.  ],
       [ 25.5 ,  11.  ,  12.  ],
       [ 24.75,  19.  ,  12.  ],
       [ 24.75,  15.  ,  12.  ],
       [ 17.75,  11.  , -16.  ],
       [  9.5 ,   1.  ,  -8.  ],
       [ 26.  ,  20.  ,  12.  ],
       [  8.5 ,  -1.  , -12.  ],
       [ 34.25,  31.  ,  12.  ],
       [ 43.  ,  41.  ,  36.  ],
       [ 26.  ,  18.  ,  16.  ],
       [ 33.  ,  26.  ,  -8.  ]])

In [194]:
clicks

array([[ 14.,  12.,   4.],
       [  5.,   8.,   3.],
       [  9.,   9.,   2.],
       [ 16.,   8.,   4.],
       [  8.,   6.,   3.],
       [ 10.,   9.,   2.],
       [ 13.,   9.,   2.],
       [ 13.,  13.,   2.],
       [  9.,   9.,   3.],
       [ 10.,  11.,   3.],
       [  8.,   8.,   4.],
       [ 14.,  13.,   2.],
       [  7.,   5.,   2.],
       [  4.,   3.,   2.],
       [  8.,  10.,   3.],
       [ 12.,  10.,   3.]])

In [13]:
for index, row in trials.iterrows():
    if index % 20 == 0: print(index)
    cost = row['info_cost']
    s_cost = str(cost)
    trial_id = row['trial_index']
    
#     stimuli_envs = make_envs([4,1,2], reward, cost, n=16, ground_truth=1)
#     menv = stimuli_envs[trial_id-1]
    menv = stimuli_envs[s_cost][trial_id-1]
       
    if s_cost == '1.0': s_cost = '1.00'
    elif s_cost == '4.0': s_cost = '4.00'
    q_weight = q_weights[s_cost]
    
    clicks = ast.literal_eval(row['clicks'])
    clicks.append(17)
    pr_sum = 0
    click_prs = []
    for a in clicks:
        f = menv.action_features(a)
        pr = np.inner(q_weight,f) - np.max([np.inner(q_weight,menv.action_features(a2)) for a2 in menv.actions(menv._state)])
        menv.step(a)
        pr_sum += pr
        click_prs.append(pr)
    trials.set_value(index,'pr_sum',pr_sum)
    trials.set_value(index,'click_prs',click_prs)
    menv.reset()

NameError: name 'trials' is not defined

In [48]:
csv_ready = trials[['pid','info_cost','trial_index','pr_sum','click_prs']]

In [49]:
csv_ready

Unnamed: 0,pid,info_cost,trial_index,pr_sum,click_prs
0,0,1.00,1,-21.801828,"[0.0, -3.48967110132, -4.38808088052, -4.38808..."
1,0,1.00,2,-10.724908,"[0.0, -1.78773376916, -8.9371740738]"
2,0,1.00,3,-5.819986,"[0.0, -0.394992325395, -5.42499349295]"
3,0,1.00,4,-6.452739,"[0.0, 0.0, 0.0, -0.432655729455, -6.02008290493]"
4,0,1.00,5,-6.910454,"[0.0, -6.91045391442]"
5,0,1.00,6,-12.222219,[-12.2222185374]
6,0,1.00,7,-12.222219,[-12.2222185374]
7,0,1.00,8,-12.222219,[-12.2222185374]
8,0,1.00,9,-12.222219,[-12.2222185374]
9,0,1.00,10,-10.693319,"[0.0, -0.432655729455, -1.1453229211, -1.14532..."


In [39]:
res = trials.set_index(['pid','info_cost','trial_index'])['click_prs'].apply(pd.Series).stack()
res = res.reset_index()
res.columns = ['pid','info_cost','trial_index','click_num','click_pr']
res

Unnamed: 0,pid,info_cost,trial_index,click_num,click_pr
0,0,1.0,1,0,0.000000
1,0,1.0,1,1,-3.489671
2,0,1.0,1,2,-4.388081
3,0,1.0,1,3,-4.388081
4,0,1.0,1,4,0.000000
5,0,1.0,1,5,-1.839783
6,0,1.0,1,6,-1.502107
7,0,1.0,1,7,-1.360207
8,0,1.0,1,8,0.000000
9,0,1.0,1,9,-1.253846


In [50]:
csv_ready.to_csv('../experiments/data/human/0.991/pilot_prs.csv')
res.to_csv('../experiments/data/human/0.991/pilot_click_prs.csv')