In [43]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import scipy.stats
from scipy.stats import norm
import scipy.integrate as integrate

import gym
from gym import spaces

import random
import itertools as it
from joblib import Parallel, delayed
from toolz import memoize
from contracts import contract
from collections import namedtuple, defaultdict, deque, Counter

import warnings
warnings.filterwarnings("ignore", 
                        message="The objective has been evaluated at this point before.")

from agents import Agent
from oldmouselab import OldMouselabEnv
from policies import FixedPlanPolicy, LiederPolicy
from evaluation import *
from distributions import cmax, smax, sample, expectation, Normal, PointMass, SampleDist, Normal, Categorical

In [44]:
def hd_dist(attributes):
    dist = [1,]*attributes
    dist[0] = np.random.randint(85,97)
    for i in range(1,attributes-1):
        dist[i] += np.random.randint(0,100-np.sum(dist))
    dist[-1] += 100-np.sum(dist)
    dist = np.around(np.array(dist)/100,decimals=2)
    np.random.shuffle(dist)
    return dist

def ld_dist(attributes):
    dist = [np.random.randint(10,40) for _ in range(attributes)]
    dist = np.around(np.array(dist)/sum(dist),decimals=2)
    np.random.shuffle(dist)
    return dist

In [45]:
gambles = 7
attributes = 4
high_stakes = Normal((9.99+0.01)/2, 0.3*(9.99-0.01))
low_stakes = Normal((0.25+0.01)/2, 0.3*(0.25-0.01))
reward = high_stakes
cost=.03

n_test = 100

test_envs_hd =  [OldMouselabEnv(gambles, hd_dist(attributes), reward, cost) for _ in range(n_test)]
test_envs_ld = [OldMouselabEnv(gambles, ld_dist(attributes), reward, cost) for _ in range(n_test)]
test_envs_all = test_envs_hd+test_envs_ld 

term_action = train_envs[0].term_action

In [46]:
agent = Agent()
def run_env(policy, env):
    agent.register(env)
    agent.register(policy)
    tr = agent.run_episode()
#     print(tr)
    return {'util': tr['return'], 'actions': tr['actions'],
            'observations': len(tr['actions']) - 1, 'ground_truth': env.ground_truth}

def characterize_strategy(policy,envs):
    operations = np.array([])
    nr_clicks = np.array([])
    probabilities = np.array([])
    returns =np.array([])
    observations =np.array([])
    
    for i in range(len(test_envs)):
        train_envs[i].reset()
        trace = run_env(bo_pol, test_envs[i])
        returns = np.append(returns,trace['util'])
        observations = np.append(observations,trace['observations'])
        operations = np.append(operations, trace['actions']) 
        nr_clicks = np.append(nr_clicks,[len(trace['actions'])-1])
        probabilities = np.append(probabilities,test_envs[i].dist) 
    
    return {'returns': returns, 'observations': observations, 'operations':operations, 'nr_clicks': nr_clicks, 'probabilities': probabilities}

# High stakes, high dispersion

In [47]:
bo_pol_theta = np.load('data/high_stakes_3cents.npy')
bo_pol_high_stakes = LiederPolicy(list(bo_pol_theta))

In [48]:
hd_hs = characterize_strategy(bo_pol_high_stakes,test_envs_hd)

In [49]:
avg_nr_clicks_hd_hs=np.mean(hd_hs['nr_clicks'])
min_nr_clicks_hd_hs=np.min(hd_hs['nr_clicks'])
max_nr_clicks_hd_hs=np.max(hd_hs['nr_clicks'])

print('In the high-dispersion environment the BO policy made between {} and {} clicks. The average number of clicks was {:3.2}'.format(min_nr_clicks_hd_hs,max_nr_clicks_hd_hs,avg_nr_clicks_hd_hs))

In the high-dispersion environment the BO policy made between 3.0 and 18.0 clicks. The average number of clicks was 8.9


# High stakes, low dispersion

In [None]:
bo_pol_theta = np.load('data/high_stakes_3cents.npy')
bo_pol_high_stakes = LiederPolicy(list(bo_pol_theta))

In [None]:
ld_hs = characterize_strategy(bo_pol_high_stakes,test_envs_ld)

In [None]:
avg_nr_clicks_ld_hs=np.mean(hd_hs['nr_clicks'])
min_nr_clicks_ld_hs=np.min(hd_hs['nr_clicks'])
max_nr_clicks_ld_hs=np.max(hd_hs['nr_clicks'])

print('In the low-dispersion environment the BO policy made between {} and {} clicks. The average number of clicks was {:4.2}'.format(min_nr_clicks_ld_hs,max_nr_clicks_ld_hs,avg_nr_clicks_ld_hs))

# Low stakes, high dispersion

In [None]:
bo_pol_theta = np.load('data/low_stakes_3cents.npy')
bo_pol_low_stakes = LiederPolicy(list(bo_pol_theta))

In [None]:
hd_ls = characterize_strategy(bo_pol_low_stakes,test_envs_hd)

In [None]:
avg_nr_clicks_hd_ls=np.mean(hd_ls['nr_clicks'])
min_nr_clicks_hd_ls=np.min(hd_ls['nr_clicks'])
max_nr_clicks_hd_ls=np.max(hd_ls['nr_clicks'])

print('In the high-dispersion environment the low-stakes policy made between {} and {} clicks. The average number of clicks was {:4.2}'.format(min_nr_clicks_hd_ls,max_nr_clicks_hd_ls,avg_nr_clicks_hd_ls))

# Low stakes, low dispersion

In [None]:
bo_pol_theta = np.load('data/low_stakes_3cents.npy')
bo_pol_low_stakes = LiederPolicy(list(bo_pol_theta))

In [None]:
ld_ls = characterize_strategy(bo_pol_low_stakes,test_envs_ld)

In [None]:
avg_nr_clicks_ld_ls=np.mean(ld_ls['nr_clicks'])
min_nr_clicks_ld_ls=np.min(ld_ls['nr_clicks'])
max_nr_clicks_ld_ls=np.max(ld_ls['nr_clicks'])

print('In the low-dispersion environment the low-stakes policy made between {} and {} clicks. The average number of clicks was {:4.2}'.format(min_nr_clicks_ld_ls,max_nr_clicks_ld_ls,avg_nr_clicks_ld_ls))