In [118]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import json
from agents import Agent
from mouselab import MouselabEnv
from distributions import Normal, Categorical
from policies import FixedPlanPolicy, LiederPolicy
from evaluation import *
from pprint import pprint
import ast

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [119]:
trials = pd.read_csv('../experiments/data/human/0.991/trials.csv')

In [120]:
trials

Unnamed: 0.1,Unnamed: 0,pid,info_cost,PR_type,message,trial_index,trial_i,delays,score,n_click,clicks,click_times,path,action_times
0,0,0,1.00,none,none,1,,[3],12.00,16,"[5, 6, 7, 8, 9, 10, 12, 11, 1, 2, 3, 4, 13, 14...","[2692, 3813, 4808, 7601, 13713, 15045, 15845, ...","[1, 5, 6, 7]","[71892, 77817, 81863]"
1,1,0,1.00,none,none,2,,[3],26.00,2,"[5, 9]","[3801, 5825]","[1, 9, 10, 12]","[7534, 21008, 22827]"
2,2,0,1.00,none,none,3,,[3],26.00,2,"[5, 6]","[2081, 3815]","[1, 5, 6, 8]","[5649, 14086, 16738]"
3,3,0,1.00,none,none,4,,[3],-8.00,4,"[5, 13, 9, 1]","[1609, 2946, 4339, 5745]","[1, 5, 6, 7]","[8788, 15820, 17077]"
4,4,0,1.00,none,none,5,,[3],19.00,1,[5],[3112],"[1, 5, 6, 7]","[5477, 9516, 14710]"
5,5,0,1.00,none,none,6,,[3],4.00,0,[],[],"[1, 5, 6, 8]","[3077, 7200, 10754]"
6,6,0,1.00,none,none,7,,[3],28.00,0,[],[],"[1, 9, 10, 12]","[2198, 6520, 8274]"
7,7,0,1.00,none,none,8,,[3],-4.00,0,[],[],"[1, 1, 2, 4]","[3118, 6943, 8352]"
8,8,0,1.00,none,none,9,,[3],4.00,0,[],[],"[1, 5, 6, 7]","[2954, 6727, 8846]"
9,9,0,1.00,none,none,10,,[3],-8.00,4,"[5, 9, 13, 1]","[7675, 8558, 9695, 10726]","[1, 9, 10, 12]","[12048, 25717, 27326]"


In [121]:
def make_env(depth, reward, cost, ground_truth=False):
    """Returns a MouselabEnv with given depth and cost. 
    If `ground_truth` is True, the reward observed at a given node will be
    constant across runs on this env. This reduces variance of the return."""
    env = MouselabEnv(depth, reward=reward, cost=cost)
    if ground_truth:
        env.ground_truth = np.array([0, *reward.sample(len(env.tree) - 1)])
    return env

def make_envs(depth, reward, cost, n=100, ground_truth=None):
    # Note, ground_truth can be an int in which case it acts as a random seed.
    if ground_truth is not None:
        np.random.seed(ground_truth)
        return [make_env(depth, reward, cost, True) for _ in range(n)]
    else:
        return [make_env(depth, reward, cost, False)] * n

In [124]:
with open('../experiments/exp1/static/json/q_weights.json') as data_file:    
    q_weights = json.load(data_file)
pprint(q_weights)

{'0.25': [2.257763937377627,
          0.10443792456696127,
          0.05249963763298343,
          0.8577953703345867,
          0.9978041373296709],
 '1.00': [2.978177180936807,
          0.3624859344787723,
          0.19189036343344185,
          0.5238392058288387,
          1.000876097233912],
 '4.00': [0.9827822354989232,
          1.005266776096542,
          -0.0016595283956967466,
          -0.0027069522504898046,
          1.0003765120051233]}


In [126]:
trials['pr_sum'] = np.zeros(trials.shape[0])

In [133]:
dist = Normal(0,10)
reward = dist.to_discrete(6)
costs = [0.25,1.00,4.00]
stimuli_envs ={str(cost):make_envs([4,1,2], reward, cost, n=16, ground_truth=1) for cost in costs}
stimuli_envs

{'0.25': [<mouselab.MouselabEnv at 0x1256715c0>,
  <mouselab.MouselabEnv at 0x126bb62e8>,
  <mouselab.MouselabEnv at 0x126bca9e8>,
  <mouselab.MouselabEnv at 0x12568e860>,
  <mouselab.MouselabEnv at 0x1256717b8>,
  <mouselab.MouselabEnv at 0x126b0c7b8>,
  <mouselab.MouselabEnv at 0x125693470>,
  <mouselab.MouselabEnv at 0x12569ba90>,
  <mouselab.MouselabEnv at 0x126b58ac8>,
  <mouselab.MouselabEnv at 0x126bde320>,
  <mouselab.MouselabEnv at 0x126bdeb38>,
  <mouselab.MouselabEnv at 0x126be75f8>,
  <mouselab.MouselabEnv at 0x1256a8e10>,
  <mouselab.MouselabEnv at 0x114770080>,
  <mouselab.MouselabEnv at 0x126b2be80>,
  <mouselab.MouselabEnv at 0x1256fd2b0>],
 '1.0': [<mouselab.MouselabEnv at 0x1256fd550>,
  <mouselab.MouselabEnv at 0x1256fd1d0>,
  <mouselab.MouselabEnv at 0x1256fd828>,
  <mouselab.MouselabEnv at 0x1256fd668>,
  <mouselab.MouselabEnv at 0x1256fd978>,
  <mouselab.MouselabEnv at 0x126c0c400>,
  <mouselab.MouselabEnv at 0x126c0c518>,
  <mouselab.MouselabEnv at 0x126c0cb70>,


In [134]:
for index, row in trials.iterrows():
    if index % 20 == 0: print(index)
    cost = row['info_cost']
    s_cost = str(cost)
    trial_id = row['trial_index']
    
#     stimuli_envs = make_envs([4,1,2], reward, cost, n=16, ground_truth=1)
#     menv = stimuli_envs[trial_id-1]
    menv = stimuli_envs[s_cost][trial_id-1]
       
    if s_cost == '1.0': s_cost = '1.00'
    elif s_cost == '4.0': s_cost = '4.00'
    q_weight = q_weights[s_cost]
    
    clicks = ast.literal_eval(row['clicks'])
    clicks.append(17)
    pr_sum = 0
    for a in clicks:
        f = menv.action_features(a)
        pr = np.inner(q_weight,f) - np.max([np.inner(q_weight,menv.action_features(a2)) for a2 in menv.actions(menv._state)])
        menv.step(a)
        pr_sum += pr
    trials.set_value(index,'pr_sum',pr_sum)
    menv.reset()

0
20
40
60
80
100
120
140
160
180
200
220
240
260
280
300
320
340
360
380
400
420
440
460
480
500
520
540
560
580
600
620
640
660
680
700
720
740
760
780
800
820
840
860
880
900
920
940
960
980
1000
1020
1040
1060
1080
1100
1120
1140
1160
1180
1200
1220
1240
1260
1280
1300
1320
1340
1360
1380
1400
1420
1440
1460
1480
1500
1520


In [135]:
csv_ready = trials[['pid','info_cost','trial_index','pr_sum']]

In [136]:
csv_ready

Unnamed: 0,pid,info_cost,trial_index,pr_sum
0,0,1.00,1,-18.339020
1,0,1.00,2,-9.742874
2,0,1.00,3,-5.019801
3,0,1.00,4,-5.653332
4,0,1.00,5,-6.346801
5,0,1.00,6,-11.961760
6,0,1.00,7,-11.961760
7,0,1.00,8,-11.961760
8,0,1.00,9,-11.961760
9,0,1.00,10,-9.395966


In [132]:
csv_ready.to_csv('pilot_prs.csv')