In [None]:
import os
os.chdir('..')
print(os.getcwd())

In [None]:
import gym
from gym import spaces

import importlib
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches

import agents
import agents.users as users
import agents.assistants.assistants as assistants

importlib.reload(agents.users)
importlib.reload(agents.assistants.assistants)

In [None]:
n_episode = 1
max_n_step = 100

n_targets = 10
debug = True

assistant = assistants.Assistant(n_targets=n_targets)
user = users.User(n_targets=n_targets, debug=debug)

u_os, a_os = [], []

for ep in range(n_episode):

    _ = user.reset()
    assistant_output = assistant.reset()

    for step in range(max_n_step):

        user_output, _, user_done, _ = user.step(assistant_output)
        if user_done:
            break

        assistant_output, _, assistant_done, _ = assistant.step(user_output)
        if assistant_done:
            break
            
        u_os.append(user_output)
        a_os.append(assistant_output)

In [None]:
plt.plot(u_os)
plt.plot(np.array(a_os)[:,0])

In [None]:
# define a user who gradually moves the cursor to a target of their choice

class FlexibleUser(object):
    """
    User with a flexible target characterised as a distribution N(mu_star, sigma_star) over cursor positions.
    """
    
    def __init__(self, mu_star, sigma_star):
        super(FlexibleUser, self).__init__()
        
        self.step_size = 0.01
        self.mu_star = mu_star
        self.sigma_star = sigma_star
        
    def reset(self):
        return None # user action a_0 ignored in environment
    
    def step(self, observable):
        # observable is assumed to be position x of the cursor
        # user takes normalized step towards the target
        diff = (self.mu_star - observable)
        dist = np.sqrt((diff**2).sum())
        return self.step_size * dist/self.sigma_star * diff/dist + 0.01*np.random.normal(size=observable.shape)
    
    def render_target(self, ax, label=''):
        ax.scatter(self.mu_star[0], self.mu_star[1], marker='x', s=15**2, linewidth=2, label=f'{label} target', zorder=100)
        cs = [patches.Circle(xy=self.mu_star, radius=(k+1)*self.sigma_star, color='k', linewidth=0.5, fill=False) for k in range(3)]
        for c in cs:
            ax.add_patch(c)
    
class CursorEnv(object):
    
    def __init__(self, d=2):
        super(CursorEnv, self).__init__()
        self.d = 2
        
    def reset(self):
        self.x = np.array([0,1]) # np.random.uniform(size=self.d)
        return self.x
    
    def step(self, observable):
        # update cursor and saturate cast to [0, 1]
        self.x = np.minimum(np.maximum(self.x + observable, 0), 1)
        return self.x
    

In [None]:
n_steps = 2

user = FlexibleUser(mu_star = np.array([0.75,0.75]), sigma_star=0.2)
ass = FlexibleUser(mu_star = np.array([0.25,0.25]), sigma_star=0.1)
env = CursorEnv()

user.reset()
ass.reset()
o_env = env.reset()
os_user, os_ass, os_env = [], [], [o_env]

for step in range(n_steps):
    # interaction
    o_user = user.step(o_env)
    o_ass = ass.step(o_env)
    o_joint = o_user + o_ass
    o_env = env.step(o_joint)
    
    # logging
    os_user.append(o_user)
    os_ass.append(o_ass)
    os_env.append(o_env)

os_env = np.array(os_env)

fig, ax = plt.subplots()
plt.sca(ax)
plt.plot(os_env[:,0], os_env[:,1], 'kx-', label='cursor')
plt.xlim([0,1])
plt.ylim([0,1])
user.render_target(ax, 'user')
ass.render_target(ax, 'assistant')


a = os_env[-2]
b = a + os_user[-1]
plt.plot([a[0], b[0]], [a[1], b[1]], label='user_action')
b = a + os_ass[-1]
plt.plot([a[0], b[0]], [a[1], b[1]], label='assistant_action')
plt.legend()