In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import itertools as it
from collections import Counter, defaultdict, deque
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from pandas import DataFrame, Series
from policies import SoftmaxPolicy
import seaborn as sns
sns.set_style('white')
sns.set_context('notebook', font_scale=1.3)

from toolz.curried import get, curry
from agents import run_episode

In [2]:
import ipyparallel as ipp 
rc = ipp.Client(profile='default', cluster_id='')
ipp.register_joblib_backend()

# Define environment

In [3]:
from mouselab import MouselabEnv
from distributions import Categorical, Normal

def make_env(mu=0, sigma=4, branching=[3,1,2], cost=1.00, scaling_factors=[1, 1, 1], seed=None, **kwargs):
    if seed is not None:
        np.random.seed(seed)
    
    def reward(depth):
        if depth > 0:
            x = np.array([-2,-1,1,2])
            vals = mu + sigma * x * scaling_factors[depth-1]
            return Categorical(vals).apply(round)
        return 0.

    return MouselabEnv.new_symmetric(branching, reward, cost=cost, **kwargs)

In [4]:
SCALING = {
    'increasing': [1/2, 1, 6],
    'decreasing': [6, 1, 1/2]
}
envs = {name: make_env(scaling_factors=sf, ground_truth=False)
        for name, sf in SCALING.items()}
env = envs['increasing']

# Alternative models

## Satisficing and pruning

In [5]:
from functools import wraps

def observed(state, node):
    return not hasattr(state[node], 'sample')

@curry
def satisfruning(preference, prune=None, satisfice=None, direction='forward', env=env):
    @wraps(preference)
    def new_pref(state, action, prune=prune, satisfice=satisfice, direction=direction):
        if action == env.term_action:
            if satisfice is not None:
                satisfied = env.expected_term_reward(state) >= satisfice
                return 1e10 if satisfied else -1e10
        elif prune is not None:
            if env.node_quality(action, state).expectation() <= prune:
                return -1e20
            previous_nodes = env.path_to(action)[:-1]
        if direction == 'forward':
            previous_node = env.path_to(action)[-2]  # [-1] is the clicked node
            if not observed(state, previous_node):
                return -1e10
        elif direction == 'backward':
            children = env.tree[action]
            if children and not any(observed(state, node) for node in children):
                return -1e10
        return preference(state, action)
    
    return new_pref

## Preferences

In [6]:
# @satisfruning(direction='forward')
def depth_first(state, action):
    previous_nodes = env.path_to(action)[:-1]
    return len(previous_nodes)

In [7]:
# @satisfruning(direction='forward')
def breadth_first(state, action):
    # Don't click nodes not on the frontier.
    previous_nodes = env.path_to(action)[:-1]
    if not all(observed(state, node) for node in previous_nodes):
        return -1e10
    # Prefer nodes that are closer to the start
    return -len(previous_nodes)

In [8]:
# @satisfruning(direction='forward')
def best_first(state, action):
    q = env.node_quality(action, state)  # note backwards arguments!  plz don't ask why...
    return q.expectation()  # node_quality is a distribution, we just want the mean

In [9]:
def progressive_deepening(state, action,  last_click=None):
    previous_nodes = env.path_to(action)[:-1]
    if not all(observed(state, node) for node in previous_nodes):
        return -1e10
    if last_click:
        # version if last_click is available
        if last_clicka in env.leaves():
            if len(previous_nodes) == 1:
                # the start of a new path = another leaf on the previous path
                return len(env.path_to(last_click)[:-1])
            return len(previous_nodes)
        if last_click == previous_nodes[-1]:
            return 1e10
        return len(previous_nodes)
    else:
        # the following only works for a 3-1-2 environment
        if action in env.leaves():
            leaf_group = None
            for subtree in env.tree:
                if (len(subtree) == 2) and (action in subtree):
                    leaf_group = subtree
            for leaf in leaf_group:
                if observed(state, leaf):
                    return 1 # set same value as the beginning of a new path
        return len(previous_nodes)

# Model human data

## Load and preprocess

In [10]:
from analysis_utils import *
VERSION = 'c2.1'
exp_data = get_data(VERSION, '../experiment/data')

pdf = exp_data['participants']
pdf = pdf.loc[pdf.completed].set_index('pid', drop=False)
if 'variance' in pdf:
    pdf.variance = pdf.variance.replace(2442, 'decreasing').replace(2424, 'increasing')
else:
    pdf['variance'] = 'constant'

print(f'{len(pdf)} participants')
complete = list(pdf.index)

def extract(q):
    return list(map(int, q['click']['state']['target']))

mdf = exp_data['mouselab-mdp'].set_index('pid', drop=False)
mdf = mdf.loc[complete]

mdf['clicks'] = mdf.queries.apply(extract)
mdf['n_clicks'] = mdf.clicks.apply(len)
mdf['thinking'] = mdf['rt'].apply(get(0, default=0))
mdf['variance'] = pdf['variance']

tdf = mdf.query('block == "test"').copy()
tdf.trial_index -= tdf.trial_index.min()
tdf.trial_index = tdf.trial_index.astype(int)
tdf.trial_id = tdf.trial_id.astype(int)

pdf['total_time'] = exp_data['survey-text'].time_elapsed / 60000
pdf['n_clicks'] = tdf.groupby('pid').n_clicks.mean()
pdf['score'] = tdf.groupby('pid').score.mean()
pdf['thinking'] = mdf.groupby('pid').thinking.mean()

Error importing rpy2
69 participants


In [11]:
import json
def excluded_pids():
    sdf = exp_data['survey-multi-choice'].query('pid == @complete').set_index('pid')
    responses = pd.DataFrame(list(sdf.responses), index=sdf.index)
    grp = responses.groupby(lambda pid: pdf.variance[pid])
    correct = grp.apply(lambda x: x.mode().iloc[0])
    errors = correct.loc[pdf.variance].set_index(pdf.index) != responses
    fail_quiz = errors.sum(1) > 1

    no_click = mdf.query('block == "train_inspector"').groupby('pid').n_clicks.sum() == 0
    return fail_quiz | no_click

excluded = excluded_pids()
tdf = tdf.loc[~excluded]
print(f'excluding {excluded.sum()} out of {len(excluded)} partipicants')

excluding 16 out of 69 partipicants


In [12]:
def get_env(row):
    row.state_rewards[0] = 0
    return make_env(scaling_factors=SCALING[row.variance], 
                    ground_truth=row.state_rewards)
tdf['env'] = tdf.apply(get_env, axis=1)

def expand(df):
    for i, row in df.iterrows():
        env = row.env
        env.reset()
        for a in [*row.clicks, env.term_action]:
            yield {'pid': row.pid, 'trial_index': row.trial_index, 'trial_id': row.trial_id,
                   'state': env._state, 'action': a}
            env.step(a)


data = pd.DataFrame(expand(tdf)).set_index('pid', drop=False)
data['variance'] = pdf.variance
print(f'{len(data)} observations)')
data.head()

6155 observations)


Unnamed: 0_level_0,action,pid,state,trial_id,trial_index,variance
pid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,5,0,"(0, ((-48, 0.25), (-24, 0.25), (24, 0.25), (48...",2045193736830000128,0,decreasing
0,9,0,"(0, ((-48, 0.25), (-24, 0.25), (24, 0.25), (48...",2045193736830000128,0,decreasing
0,6,0,"(0, ((-48, 0.25), (-24, 0.25), (24, 0.25), (48...",2045193736830000128,0,decreasing
0,10,0,"(0, ((-48, 0.25), (-24, 0.25), (24, 0.25), (48...",2045193736830000128,0,decreasing
0,13,0,"(0, ((-48, 0.25), (-24, 0.25), (24, 0.25), (48...",2045193736830000128,0,decreasing


## Likelihood model

In [13]:
class MouselabModel():
    """Computes likelihoods for a softmax policy on a grid of temp and p_error."""
    def __init__(self, env, preference, data):
        self.env = env
        self.policy = SoftmaxPolicy(preference)
        self.data = data
        run_episode(self.policy, self.env)
        self.prefs = np.stack(self.data.state.apply(self.policy.preferences))
        self.prefs -= self.prefs.max(1).reshape(-1, 1)  # prevent float overflow 

        idx = np.arange(len(data))
        self.chosen = (idx, data.action.as_matrix())

    def likelihood(self, temp=1e-9, p_error=None):
        """Returns likelihood for all combinations of temp and p_error given.
        
        Return value has shape (len(self.data), len(temp), len(p_error)).
        """
        temp = np.atleast_1d(temp)
        ep = np.exp(np.einsum('ij,k -> ijk', self.prefs, 1/temp))
        probs = ep[self.chosen] / ep.sum(1)  # shape: (state, temp)
        if p_error is not None:
            p_error = np.atleast_1d(p_error)
            probs = (probs[..., np.newaxis] * (1-p_error) + 
                     self.random_likelihood[..., np.newaxis] * p_error)
        return probs

    @property
    @memoize
    def random_likelihood(self):
        return MouselabModel(self.env, lambda *_: 0, self.data).likelihood()

## Parameters to search over

In [14]:
# All possible path values i.e all possible sums of
# rewards on the path.
from distributions import ZERO
path_dist = sum((env.init[n] for n in env.path_to(env.leaves()[0])[1:]), ZERO)
path_values = list(path_dist.vals)

defaults = {
    'p_error': np.linspace(0.01,0.25, 25),
    'satisfice': [v for v in path_values if v > 0],
    'prune': [v for v in path_values if v < 0],
    'temp': np.logspace(-5,1, 50),
}
models = {
#     'depth_first': {
#         **defaults,
#         'pref': depth_first,  
#         'temp': [1e-10]
#     },
#     'breadth_first': {
#         **defaults,
#         'pref': breadth_first,
#         'temp': [1e-10]
#    },
#     'progressive_deepening': {
#         **defaults,
#         'pref': progressive_deepening,
#         'temp': [1e-10]
#     },
    'best_first': {
        **defaults,
        'pref': best_first,
        'direction': ['forward', 'backward', None],
    },
    'random': {
        **defaults,
        'pref': lambda s, a: 0,
        'prune': [None],
        'satisfice': [None],
        'p_error': [1],
        'temp': [1e-10]
    },
}

## Find MLE

In [29]:
def mle(model, **sf_prm):
    m = models[model]    
    if sf_prm.get('prune') or sf_prm.get('satisfice'):
        pref = satisfruning(m['pref'], **sf_prm)
    else:
        pref = m['pref']
    likemod = MouselabModel(env, pref, data)
    logp = np.log(likemod.likelihood(temp=m['temp'], p_error=m['p_error']))
    total_logp = logp.sum(0)  # sum logp for each state/action
    best = total_logp.argmax()
    i, j = np.unravel_index(best, total_logp.shape)
    return {'temp': m['temp'][i], 'p_error': m['p_error'][j], 'N': len(data),
            'logp': total_logp.flat[best], 'model': model, **sf_prm}

from joblib import Parallel, delayed

jobs = [delayed(mle)('best_first', prune=prune, satisfice=satisfice, direction=direction)
        for direction in ['forward']
        for prune in defaults['prune']
        for satisfice in defaults['satisfice']]

In [30]:
from tqdm import tqdm
results = Parallel(22)(tqdm(jobs))








  0%|          | 0/625 [00:00<?, ?it/s][A[A[A[A[A[A[A




[A[A[A[A[A






  0%|          | 1/625 [00:00<02:20,  4.43it/s][A[A[A[A[A[A[A





 21%|██        | 129/625 [00:28<01:48,  4.57it/s][A[A[A[A[A[A






  7%|▋         | 44/625 [00:03<00:41, 13.99it/s][A[A[A[A[A[A[A






 10%|█         | 64/625 [00:03<00:28, 19.68it/s][A[A[A[A[A[A[A






 11%|█         | 70/625 [00:05<00:46, 11.97it/s][A[A[A[A[A[A[A






 14%|█▍        | 86/625 [00:05<00:37, 14.42it/s][A[A[A[A[A[A[A






 15%|█▌        | 94/625 [00:08<00:48, 10.99it/s][A[A[A[A[A[A[A






 16%|█▋        | 103/625 [00:08<00:43, 11.89it/s][A[A[A[A[A[A[A






 18%|█▊        | 110/625 [00:11<00:51,  9.93it/s][A[A[A[A[A[A[A






 18%|█▊        | 115/625 [00:11<00:49, 10.24it/s][A[A[A[A[A[A[A






 20%|██        | 127/625 [00:11<00:44, 11.20it/s][A[A[A[A[A[A[A






 21%|██▏       | 134/625 [00:13<00:50,  9.75it/s][A[A[A[A[A[A[A

In [32]:
np.save('data/results',results)

In [33]:
pd.DataFrame(results).to_pickle('data/best_first.pkl')