In [1]:
import numpy as np


MAX_K = 500
MAX_MU = 1.0
MAX_AL = 2
MAX_AH = 2
N_STEPS = 10 # per thing
N_DIMS = 4 # for generalizing this code later

In [2]:
def features1(k, mu, al, ah):
    return np.array([
        1,
        k,
        mu,
        al,
        ah,
        k * mu,
        k * al,
        k * ah,
        mu * al,
        mu * ah,
        al * ah
    ])

def features2(k, mu, al, ah):
    return np.array([
        1,
        k,
        mu,
        al,
        ah,
#        k * mu,
#        k * al,
#        k * ah,
#        mu * al,
#        mu * ah,
#        al * ah,
        k**2,
        mu**2,
        al**2,
        ah**2
    ])

get_features = features2

In [3]:
from collections import deque

class Crediter:
    def __init__(self, max_length = 8):
        self._history = deque(maxlen=max_length)
        self._a = 0.175
        self._b = 0.1
        self._c = 0.05
        tmp = get_features(0, 0, 0, 0)
        for _ in range(max_length):
            self._history.append(tmp)
        
    def add_index(self, feature_vec):
        if len(self._history) >= self._history.maxlen:
            self._history.pop()
        self._history.append(feature_vec)

    def credit(self):
        return sum([w * v for w, v in zip(
            np.array([self._c, self._b, self._a, self._a, self._a, self._a, self._b, self._c]),
            self._history
        )])

class Crediter2(Crediter):
    def __init__(self):
        self._history = deque(maxlen=4)
        self._a = 0.5
        self._b = 0.25
        self._c = 0.15
        self._d = 0.05
        tmp = get_features(0, 0, 0, 0)
        for _ in range(self._history.maxlen):
            self._history.append(tmp)

    def credit(self):
        return sum([w * v for w, v in zip(
            np.array([self._a, self._b, self._c, self._d]),
            self._history
        )])

class Crediter3(Crediter):
    def __init__(self):
        self._history = deque(maxlen=2)
        tmp = get_features(0, 0, 0, 0)
        for _ in range(self._history.maxlen):
            self._history.append(tmp)

    def credit(self):
        return sum([w * v for w, v in zip(
            np.array([0.75, 0.25]),
            self._history
        )])

In [4]:
class RandomAgent:
    def __init__(self):
        self.state = np.zeros(N_DIMS)
        a = np.eye(N_DIMS) / N_STEPS
        self._actions = np.concatenate((a, -a))
        self._rng = np.random.default_rng()
        print(self.state)

    # Values should be normalized to 0-1 space for each
    def set_state(self, k, mu, al, ah):
        self.state = np.array([k, mu, al, ah])

    def select_action(self):
        # Select actions randomly until it's valid
        invalid = True
        while invalid:
            action = self._actions[self._rng.integers(len(self._actions))]
            next_state = self.state + action
            invalid = not ((next_state >= 0) & (next_state <= 1)).all(0)
        return action

    def apply_action(self, action):
        next_state = self.state + action
        if ((next_state >= 0) & (next_state <= 1)).all(0):
            self.state = next_state
        else:
            raise Exception(f"Tried to transition to invalid state {next_state}.")

In [5]:
class LinearSGDAgent:
    def __init__(self):
        self.crediter = Crediter3()
        self.state = np.zeros(N_DIMS)
        self._weights = np.zeros(len(get_features(*self.state)))
        a = np.eye(N_DIMS) / N_STEPS
        self._actions = np.concatenate((a, -a))
        self._rng = np.random.default_rng()
        self._epsilon = 0.1
        self._alpha = 0.002 # taken from scurto et al 2021

    def set_state(self, k, mu, al, ah):
        self.state = np.array([k, mu, al, ah])
        self.crediter.add_index(get_features(*self.state))

    def check_bounds(self, state):
        return ((state >= 0) & (state <= 1)).all(0)

    def get_value(self, state):
        return np.dot(self._weights, get_features(*state))

    def select_action(self):
        max_actions = []
        invs = []
        max_value = np.NINF
        for action in self._actions:
            next_state = self.state + action
            if self.check_bounds(next_state):
                value = self.get_value(next_state)
                if np.isclose(max_value, value):
                    max_actions.append(action)
                elif value > max_value:
                    max_value = value
                    max_actions = [action]
            else:
                invs.append(action)
        if len(invs) > 0:
            print(f"Invalid actions {invs}")
        print(f"Maximum value of {max_value}")
        return max_actions[self._rng.integers(len(max_actions))]

    def select_epsilon_greedy_action(self):
        if self._rng.random() < self._epsilon:   
            # Random action
            invalid = True
            while invalid:
               action = self._actions[self._rng.integers(len(self._actions))]
               next_state = self.state + action
               invalid = not self.check_bounds(next_state)
            print(f"Taking random action {action}")
            return action
        else:
            return self.select_action() 

    def apply_action(self, action):
        next_state = self.state + action
        if self.check_bounds(next_state):
            self.set_state(*next_state)
        else:
            raise Exception(f"Tried to transition to an invalid state {next_state}.")

    def reward_and_bootstrap(self, reward):
        credit_features = self.crediter.credit()
        error = reward - np.dot(self._weights, credit_features)
        print(f"Error - {error}")
        self._weights = self._weights + self._alpha * error * credit_features
        

In [13]:
from pythonosc.dispatcher import Dispatcher
from pythonosc.osc_server import ThreadingOSCUDPServer
from pythonosc.udp_client import SimpleUDPClient
from threading import Thread

manualMode = True
agent = LinearSGDAgent()

ip = "127.0.0.1" # localhost
port = 8080
destPort = 8081

client = SimpleUDPClient(ip, destPort)

def default_handler(address, *args):
    print(f"DEFAULT {address}: {args}")

def auto_switch_handler(address, state, *args):
    print(f"Is Manual {state}")
    manualMode = state

def manual_set(address, element, *args):
    # Currently assume only one element - TODO revise later
    agent.set_state(args[0], args[1], args[2], args[3])
    print(agent.state)

def step(address, element):
    old_state = agent.state
    action = agent.select_epsilon_greedy_action()
    print(f"Taking action {action}")
    agent.apply_action(action)
    #print(f"Transitioned from {old_state} to {agent.state}")
    client.send_message("/controller/agentSet", [0, agent.state[0], agent.state[1], agent.state[2], agent.state[3]])

def reward(address, element, reward):
    # Currently assuming only one element - TODO revise later
    old_weights = agent._weights
    agent.reward_and_bootstrap(reward)
    # print(f"Weights updated from {old_weights} to {agent._weights}")
    
    

dispatcher = Dispatcher()
dispatcher.set_default_handler(default_handler)
dispatcher.map("/uistate/setAutonomous", auto_switch_handler)
dispatcher.map("/controller/manualSet", manual_set)
dispatcher.map("/controller/step", step)
dispatcher.map("/controller/reward", reward)

ip = "127.0.0.1" # localhost
port = 8080

with ThreadingOSCUDPServer((ip, port), dispatcher) as server:
    def quit_func(address, *args):
        print("Quit!")
        server.shutdown()
        server.server_close()
    dispatcher.map("/quit", quit_func)
    thread = Thread(target=server.serve_forever)
    thread.start()
    thread.join()
print("And we're out!")

[0. 0. 0. 0.]
Is Manual False
Invalid actions [array([-0.1, -0. , -0. , -0. ]), array([-0. , -0.1, -0. , -0. ]), array([-0. , -0. , -0.1, -0. ]), array([-0. , -0. , -0. , -0.1])]
Maximum value of 0.0
Taking action [0.  0.1 0.  0. ]
Invalid actions [array([-0.1, -0. , -0. , -0. ]), array([-0. , -0. , -0.1, -0. ]), array([-0. , -0. , -0. , -0.1])]
Maximum value of 0.0
Taking action [0.  0.  0.  0.1]
Invalid actions [array([-0.1, -0. , -0. , -0. ]), array([-0. , -0. , -0.1, -0. ])]
Maximum value of 0.0
Taking action [0.  0.  0.  0.1]
Invalid actions [array([-0.1, -0. , -0. , -0. ]), array([-0. , -0. , -0.1, -0. ])]
Maximum value of 0.0
Taking action [-0.  -0.  -0.  -0.1]
Invalid actions [array([-0.1, -0. , -0. , -0. ]), array([-0. , -0. , -0.1, -0. ])]
Maximum value of 0.0
Taking action [0.1 0.  0.  0. ]
Invalid actions [array([-0. , -0. , -0.1, -0. ])]
Maximum value of 0.0
Taking action [0.  0.  0.  0.1]
Invalid actions [array([-0. , -0. , -0.1, -0. ])]
Maximum value of 0.0
Taking action

----------------------------------------
Exception occurred during processing of request from ('127.0.0.1', 56328)
Traceback (most recent call last):
  File "/usr/lib/python3.11/socketserver.py", line 691, in process_request_thread
    self.finish_request(request, client_address)
  File "/usr/lib/python3.11/socketserver.py", line 361, in finish_request
    self.RequestHandlerClass(request, client_address, self)
  File "/usr/lib/python3.11/socketserver.py", line 755, in __init__
    self.handle()
  File "/home/juliette/agentenv/lib/python3.11/site-packages/pythonosc/osc_server.py", line 33, in handle
    server.dispatcher.call_handlers_for_packet(self.request[0], self.client_address)
  File "/home/juliette/agentenv/lib/python3.11/site-packages/pythonosc/dispatcher.py", line 193, in call_handlers_for_packet
    handler.invoke(client_address, timed_msg.message)
  File "/home/juliette/agentenv/lib/python3.11/site-packages/pythonosc/dispatcher.py", line 56, in invoke
    self.callback(messa

In [6]:
import pythonosc
import sys

print("VERSIONS")
print(f"Python: {sys.version}")
print(f"numpy: {np.__version__}")

VERSIONS
Python: 3.11.2 (main, Mar 13 2023, 12:18:29) [GCC 12.2.0]
numpy: 1.26.1
