In [6]:
from linguaml.rl.agent import Agent
from linguaml.rl.state import State, BatchedStates
from linguaml.rl.action import Action
from linguaml.tolearn.family import Family

In [None]:
class A:
    
    class Action(Action):
        def __init__(self, data):
            super().__init__(data)
    
    def __init__(self) -> None:
        
        self.__class__.Action.family = 
    
    def select_action(self) -> Action:
        return self.Action(1)

In [None]:
from linguaml.tolearn.families import SVCFamily

agent = Agent(
    SVCFamily
)

In [None]:
from functools import partial
from linguaml.types import Number, NumberList, is_number_list

is_number_list([1, 2, 3.0])

## Selecting Actions

### Single Action

In [5]:
import numpy as np

from linguaml.tolearn.families import SVCFamily
from linguaml.rl.state import State, calc_n_state_features
from linguaml.rl.agent import Agent

State.n_time_steps = 10
state = State(np.random.rand(State.n_time_steps, calc_n_state_features(SVCFamily)))

agent = Agent(SVCFamily)
agent.select_action(state)

TypeError: Agent.__init__() missing 1 required positional argument: 'numeric_hp_bounds'

In [None]:
len(np.array([1, 2, 3 ]).shape)

In [None]:
len(np.array(1).shape)

### Batched Actions

In [None]:
import numpy as np

from linguaml.tolearn.families import SVCFamily
from linguaml.rl.state import BatchedStates, calc_n_state_features
from linguaml.rl import state
from linguaml.rl.state import BatchedStates
from linguaml.rl.agent import Agent

state.n_time_steps = 10
batched_states = BatchedStates(
    np.random.rand(
        10, 
        state.n_time_steps, 
        calc_n_state_features(SVCFamily)
    )
)

agent = Agent(
    SVCFamily,
    numeric_hp_bounds={
        "C": (0.1, 100),
        "gamma": (1e-3, 0.1),
        "tol": (1e-5, 1e-1),
    }
)
batched_actions = agent.select_action(batched_states)

batched_actions

In [None]:
batched_actions.to_hp_configs(
    agent.family,
    agent.numeric_hp_bounds
)

## Log-Probabilities

### Single Data

In [1]:
from linguaml.rl.agent import Agent
from linguaml.tolearn.families import SVCFamily
from linguaml.rl.state import State, set_state_time_steps, calc_n_state_features, n_time_steps

import numpy as np

# Set the number of time steps for the state
set_state_time_steps(10)

# Create a random state
state = State(
    np.random.rand(
        n_time_steps, 
        calc_n_state_features(SVCFamily)
    )
)

# Create an agent
agent = Agent(
    SVCFamily,
    numeric_hp_bounds={
        "C": (0.1, 100),
        "gamma": (1e-3, 0.1),
        "tol": (1e-5, 1e-1),
    }
)

# Select an action
action = agent.select_action(state)

action

{'C': 1.0,
 'kernel': 0,
 'gamma': 0.0,
 'tol': 0.25867778062820435,
 'decision_function_shape': 0}

Without providing the argument `state` in agent's method `log_prob`, we compute the log-probability of the action taken based on the latest state:

In [2]:
# Get the log probability of the action based on the latest state
log_prob = agent.log_prob(action)

log_prob

tensor(-3.9092, grad_fn=<SumBackward1>)

Of course, this is equivalent to:

In [3]:
# Get the log probability of the action based on the provided state
log_prob = agent.log_prob(action, state)

log_prob

tensor(-3.9092, grad_fn=<SumBackward1>)

But by passing the `state`, the agent regenerated the distributions for selecting the actions by calling the `forward` method. Hence, you may neglect the argument `state` and save some time if you indeed want to compute the log-probability based on the latest state.

### Batched Data

In [4]:
from linguaml.rl.agent import Agent
from linguaml.tolearn.families import SVCFamily
from linguaml.rl.state import BatchedStates, set_state_time_steps, calc_n_state_features, n_time_steps

import numpy as np

# Set the number of time steps for the state
set_state_time_steps(10)

# Create a random batch of states
batched_states = BatchedStates(
    np.random.rand(
        5,
        n_time_steps, 
        calc_n_state_features(SVCFamily)
    )
)

# Create an agent
agent = Agent(
    SVCFamily,
    numeric_hp_bounds={
        "C": (0.1, 100),
        "gamma": (1e-3, 0.1),
        "tol": (1e-5, 1e-1),
    }
)

# Select batched actions
batched_actions = agent.select_action(batched_states)

# Compute the log probabilities of the batched actions
log_probs = agent.log_prob(batched_actions)

log_probs

tensor([-4.0706, -3.3700, -3.2919, -3.2556, -3.6127], grad_fn=<SumBackward1>)