In [11]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import json
from agents import Agent
from mouselab import MouselabEnv
from distributions import Normal, Categorical
from policies import FixedPlanPolicy, LiederPolicy
from evaluation import *
from pprint import pprint
import ast

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
trials = pd.read_csv('../experiments/data/human/0.991/trials.csv')

In [3]:
trials

Unnamed: 0.1,Unnamed: 0,pid,info_cost,PR_type,message,trial_index,trial_i,delays,score,n_click,clicks,click_times,path,action_times
0,0,0,1.00,none,none,1,,[3],12.00,16,"[5, 6, 7, 8, 9, 10, 12, 11, 1, 2, 3, 4, 13, 14...","[2692, 3813, 4808, 7601, 13713, 15045, 15845, ...","[1, 5, 6, 7]","[71892, 77817, 81863]"
1,1,0,1.00,none,none,2,,[3],26.00,2,"[5, 9]","[3801, 5825]","[1, 9, 10, 12]","[7534, 21008, 22827]"
2,2,0,1.00,none,none,3,,[3],26.00,2,"[5, 6]","[2081, 3815]","[1, 5, 6, 8]","[5649, 14086, 16738]"
3,3,0,1.00,none,none,4,,[3],-8.00,4,"[5, 13, 9, 1]","[1609, 2946, 4339, 5745]","[1, 5, 6, 7]","[8788, 15820, 17077]"
4,4,0,1.00,none,none,5,,[3],19.00,1,[5],[3112],"[1, 5, 6, 7]","[5477, 9516, 14710]"
5,5,0,1.00,none,none,6,,[3],4.00,0,[],[],"[1, 5, 6, 8]","[3077, 7200, 10754]"
6,6,0,1.00,none,none,7,,[3],28.00,0,[],[],"[1, 9, 10, 12]","[2198, 6520, 8274]"
7,7,0,1.00,none,none,8,,[3],-4.00,0,[],[],"[1, 1, 2, 4]","[3118, 6943, 8352]"
8,8,0,1.00,none,none,9,,[3],4.00,0,[],[],"[1, 5, 6, 7]","[2954, 6727, 8846]"
9,9,0,1.00,none,none,10,,[3],-8.00,4,"[5, 9, 13, 1]","[7675, 8558, 9695, 10726]","[1, 9, 10, 12]","[12048, 25717, 27326]"


In [4]:
def make_env(depth, reward, cost, ground_truth=False):
    """Returns a MouselabEnv with given depth and cost. 
    If `ground_truth` is True, the reward observed at a given node will be
    constant across runs on this env. This reduces variance of the return."""
    env = MouselabEnv(depth, reward=reward, cost=cost)
    if ground_truth:
        env.ground_truth = np.array([0, *reward.sample(len(env.tree) - 1)])
    return env

def make_envs(depth, reward, cost, n=100, ground_truth=None):
    # Note, ground_truth can be an int in which case it acts as a random seed.
    if ground_truth is not None:
        np.random.seed(ground_truth)
        return [make_env(depth, reward, cost, True) for _ in range(n)]
    else:
        return [make_env(depth, reward, cost, False)] * n

In [5]:
with open('../experiments/exp1/static/json/q_weights.json') as data_file:    
    q_weights = json.load(data_file)
pprint(q_weights)

{'0.25': [2.3252393134113225,
          0.09975155227317743,
          0.08032182729437333,
          0.8450937421220647,
          0.997435322775085],
 '1.00': [1.9484115264310722,
          0.4288647055137153,
          0.269370854918958,
          0.44258532182537325,
          0.9912367388407017],
 '4.00': [1.02455453397985,
          0.9723698649146737,
          0.0048616156760657794,
          0.01790135732732076,
          1.000241178138463]}


In [14]:
trials['pr_sum'] = np.zeros(trials.shape[0])
trials['click_prs'] = [[]]*trials.shape[0]

In [68]:
dist = Normal(0,10)
reward = dist.to_discrete(6)
costs = [0.25,1.00,4.00]
stimuli_envs ={str(cost):make_envs([4,1,2], reward, cost, n=16, ground_truth=1) for cost in costs}
stimuli_envs

{'0.25': [<mouselab.MouselabEnv at 0x2aacda66b710>,
  <mouselab.MouselabEnv at 0x2aacda66bac8>,
  <mouselab.MouselabEnv at 0x2aacda66bd68>,
  <mouselab.MouselabEnv at 0x2aacda66bfd0>,
  <mouselab.MouselabEnv at 0x2aacda67d048>,
  <mouselab.MouselabEnv at 0x2aacda67d2b0>,
  <mouselab.MouselabEnv at 0x2aacda67d518>,
  <mouselab.MouselabEnv at 0x2aacda67d780>,
  <mouselab.MouselabEnv at 0x2aacda67d9e8>,
  <mouselab.MouselabEnv at 0x2aacda67cd30>,
  <mouselab.MouselabEnv at 0x2aacda66b390>,
  <mouselab.MouselabEnv at 0x2aacda6c2198>,
  <mouselab.MouselabEnv at 0x2aacda6c2400>,
  <mouselab.MouselabEnv at 0x2aacda6c2668>,
  <mouselab.MouselabEnv at 0x2aacda6c28d0>,
  <mouselab.MouselabEnv at 0x2aacda6c2b38>],
 '1.0': [<mouselab.MouselabEnv at 0x2aacda6c2e10>,
  <mouselab.MouselabEnv at 0x2aacda6c6198>,
  <mouselab.MouselabEnv at 0x2aacda6c64e0>,
  <mouselab.MouselabEnv at 0x2aacda6c6828>,
  <mouselab.MouselabEnv at 0x2aacda6c6b70>,
  <mouselab.MouselabEnv at 0x2aacda6c6eb8>,
  <mouselab.Mous

In [69]:
thetas = [[2.55,0.47,0.18,0.35,1.0],[1.00,0.42,0.42,0.17,1.0],[5.74,0.32,0.00,0.68,1.0]]
optimal_reward = np.zeros((16,3))
worst_reward = np.zeros((16,3))
lc_pol_reward = np.zeros((16,3))
clicks = np.zeros((16,3))
for cost_id in range(3):
    policy = LiederPolicy(thetas[cost_id])
    envs =  stimuli_envs[str(costs[cost_id])]
    ev = evaluate(policy, envs)
    lc_pol_reward[:,cost_id] = ev['util']
    clicks[:,cost_id] = ev['observations']
    for env_id in range(16):
        env.reset()
        env = envs[env_id]
        optimal_reward[env_id,cost_id] = env.true_Q(0)
        worst_reward[env_id,cost_id] = env.worst_Q(0)
        env.reset()

In [71]:
optimal_reward

array([[  4.,   4.,   4.],
       [ 36.,  36.,  36.],
       [ 36.,  36.,  36.],
       [ -4.,  -4.,  -4.],
       [ 28.,  28.,  28.],
       [ 28.,  28.,  28.],
       [ 28.,  28.,  28.],
       [ 28.,  28.,  28.],
       [ 20.,  20.,  20.],
       [ 12.,  12.,  12.],
       [ 28.,  28.,  28.],
       [ 12.,  12.,  12.],
       [ 36.,  36.,  36.],
       [ 44.,  44.,  44.],
       [ 28.,  28.,  28.],
       [ 36.,  36.,  36.]])

In [72]:
worst_reward

array([[-36., -36., -36.],
       [-12., -12., -12.],
       [-20., -20., -20.],
       [-36., -36., -36.],
       [-12., -12., -12.],
       [ -4.,  -4.,  -4.],
       [ -4.,  -4.,  -4.],
       [-60., -60., -60.],
       [-20., -20., -20.],
       [-36., -36., -36.],
       [-44., -44., -44.],
       [-20., -20., -20.],
       [-20., -20., -20.],
       [-36., -36., -36.],
       [-20., -20., -20.],
       [-12., -12., -12.]])

In [73]:
lc_pol_reward

array([[  1.000e+00,  -8.000e+00,  -4.441e-16],
       [  3.400e+01,   2.800e+01,  -4.441e-16],
       [  3.375e+01,   2.700e+01,  -4.441e-16],
       [  5.750e+00,   1.000e+00,  -4.441e-16],
       [  2.575e+01,   2.000e+01,  -4.441e-16],
       [  2.575e+01,   1.100e+01,  -4.441e-16],
       [  2.575e+01,   1.900e+01,  -4.441e-16],
       [  2.475e+01,   1.100e+01,  -4.441e-16],
       [  1.750e+01,   1.300e+01,  -4.441e-16],
       [  9.250e+00,   1.000e+00,  -4.441e-16],
       [  2.600e+01,   2.000e+01,  -4.441e-16],
       [  8.750e+00,   2.000e+00,  -4.441e-16],
       [  3.400e+01,   2.700e+01,  -4.441e-16],
       [  3.100e+01,   2.900e+01,  -4.441e-16],
       [  2.625e+01,   1.800e+01,  -4.441e-16],
       [  3.350e+01,   7.000e+00,  -4.441e-16]])

In [74]:
rel = (lc_pol_reward-worst_reward)/(optimal_reward-worst_reward)
rel

array([[ 0.925,  0.7  ,  0.9  ],
       [ 0.958,  0.833,  0.25 ],
       [ 0.96 ,  0.839,  0.357],
       [ 1.305,  1.156,  1.125],
       [ 0.944,  0.8  ,  0.3  ],
       [ 0.93 ,  0.469,  0.125],
       [ 0.93 ,  0.719,  0.125],
       [ 0.963,  0.807,  0.682],
       [ 0.938,  0.825,  0.5  ],
       [ 0.943,  0.771,  0.75 ],
       [ 0.972,  0.889,  0.611],
       [ 0.898,  0.688,  0.625],
       [ 0.964,  0.839,  0.357],
       [ 0.838,  0.812,  0.45 ],
       [ 0.964,  0.792,  0.417],
       [ 0.948,  0.396,  0.25 ]])

In [75]:
clicks

array([[ 12.,  12.,   0.],
       [  8.,   8.,   0.],
       [  9.,   9.,   0.],
       [  9.,   7.,   0.],
       [  9.,   4.,   0.],
       [  9.,   9.,   0.],
       [  9.,   9.,   0.],
       [ 13.,   9.,   0.],
       [ 10.,   7.,   0.],
       [ 11.,  11.,   0.],
       [  8.,   8.,   0.],
       [ 13.,  10.,   0.],
       [  8.,   5.,   0.],
       [  4.,   3.,   0.],
       [  7.,  10.,   0.],
       [ 10.,   9.,   0.]])

In [77]:
np.savetxt('../experiments/data/stimuli/exp1/optimal1A.2.csv', optimal_reward, delimiter=",")
np.savetxt('../experiments/data/stimuli/exp1/worst1A.2.csv', worst_reward, delimiter=",")
np.savetxt('../experiments/data/stimuli/exp1/rel_score_pi_star1A.2.csv', rel, delimiter=",")
np.savetxt('../experiments/data/stimuli/exp1/nr_observations_pi_star1A.2.csv', clicks, delimiter=",")

In [13]:
for index, row in trials.iterrows():
    if index % 20 == 0: print(index)
    cost = row['info_cost']
    s_cost = str(cost)
    trial_id = row['trial_index']
    
#     stimuli_envs = make_envs([4,1,2], reward, cost, n=16, ground_truth=1)
#     menv = stimuli_envs[trial_id-1]
    menv = stimuli_envs[s_cost][trial_id-1]
       
    if s_cost == '1.0': s_cost = '1.00'
    elif s_cost == '4.0': s_cost = '4.00'
    q_weight = q_weights[s_cost]
    
    clicks = ast.literal_eval(row['clicks'])
    clicks.append(17)
    pr_sum = 0
    click_prs = []
    for a in clicks:
        f = menv.action_features(a)
        pr = np.inner(q_weight,f) - np.max([np.inner(q_weight,menv.action_features(a2)) for a2 in menv.actions(menv._state)])
        menv.step(a)
        pr_sum += pr
        click_prs.append(pr)
    trials.set_value(index,'pr_sum',pr_sum)
    trials.set_value(index,'click_prs',click_prs)
    menv.reset()

0
20


KeyboardInterrupt: 

In [48]:
csv_ready = trials[['pid','info_cost','trial_index','pr_sum','click_prs']]

In [49]:
csv_ready

Unnamed: 0,pid,info_cost,trial_index,pr_sum,click_prs
0,0,1.00,1,-21.801828,"[0.0, -3.48967110132, -4.38808088052, -4.38808..."
1,0,1.00,2,-10.724908,"[0.0, -1.78773376916, -8.9371740738]"
2,0,1.00,3,-5.819986,"[0.0, -0.394992325395, -5.42499349295]"
3,0,1.00,4,-6.452739,"[0.0, 0.0, 0.0, -0.432655729455, -6.02008290493]"
4,0,1.00,5,-6.910454,"[0.0, -6.91045391442]"
5,0,1.00,6,-12.222219,[-12.2222185374]
6,0,1.00,7,-12.222219,[-12.2222185374]
7,0,1.00,8,-12.222219,[-12.2222185374]
8,0,1.00,9,-12.222219,[-12.2222185374]
9,0,1.00,10,-10.693319,"[0.0, -0.432655729455, -1.1453229211, -1.14532..."


In [39]:
res = trials.set_index(['pid','info_cost','trial_index'])['click_prs'].apply(pd.Series).stack()
res = res.reset_index()
res.columns = ['pid','info_cost','trial_index','click_num','click_pr']
res

Unnamed: 0,pid,info_cost,trial_index,click_num,click_pr
0,0,1.0,1,0,0.000000
1,0,1.0,1,1,-3.489671
2,0,1.0,1,2,-4.388081
3,0,1.0,1,3,-4.388081
4,0,1.0,1,4,0.000000
5,0,1.0,1,5,-1.839783
6,0,1.0,1,6,-1.502107
7,0,1.0,1,7,-1.360207
8,0,1.0,1,8,0.000000
9,0,1.0,1,9,-1.253846


In [50]:
csv_ready.to_csv('../experiments/data/human/0.991/pilot_prs.csv')
res.to_csv('../experiments/data/human/0.991/pilot_click_prs.csv')