In [1]:
import numpy as np
from omegaconf import OmegaConf
from torchrl.envs.utils import step_mdp
from tqdm import tqdm
from environments import make_env
from src.agents import get_agent
from src.utils import (
    login,
    logout,
    prefill_buffer,
)


## Training Parameter
Define training configuration:

In [11]:
# Agent configuration
agent_parameters = {
    "name": "sac",
    "lr": 3e-4,
    "batch_size": 256,
    "num_updates": 1,
    "prefill_episodes": 10,
    "num_cells": 256,
    "gamma": 0.99,
    "soft_update_eps": 0.995,
    "alpha_init": 1,
    "fixed_alpha": False,
    "loss_function": "l2",
    "normalization": "None",
    "dropout": 0.0,
    "prb": 0,
    "buffer_size": 1000000,
    "reset_params": False,
}

# Environment configuration
env_parameters = {
    "name": "roboarm_sim-v0",
    "max_episode_steps": 100,
    "verbose": 0,
    "frame_stack": 1,
    "action_filter": 1,
    "noise": 0.05,
    "reward_signal": "dense",
}
conf = OmegaConf.create({"run_name": "RoboArm-SAC-Example",
                         "device": "cuda",
                         "episodes": 200,
                         "agent": agent_parameters,
                         "env": env_parameters,})
print(OmegaConf.to_yaml(conf))

run_name: RoboArm-SAC-Example
device: cuda
episodes: 200
agent:
  name: sac
  lr: 0.0003
  batch_size: 256
  num_updates: 1
  prefill_episodes: 10
  num_cells: 256
  gamma: 0.99
  soft_update_eps: 0.995
  alpha_init: 1
  fixed_alpha: false
  loss_function: l2
  normalization: None
  dropout: 0.0
  prb: 0
  buffer_size: 1000000
  reset_params: false
env:
  name: roboarm_sim-v0
  max_episode_steps: 100
  verbose: 0
  frame_stack: 1
  action_filter: 1
  noise: 0.05
  reward_signal: dense



## Create Agent & Environment


In [12]:
# create environment
env, action_space, state_space = make_env(conf)

# make agent
agent, project_name = get_agent(action_space, state_space, conf)

# loading agent weights or replay buffer
login(agent)

--- Agent initialized ---
Model not loaded!
Buffer not loaded!


## Prefill Replay Buffer
Prefill the replay buffer with random action transitions.

In [None]:
prefill_buffer(
    env=env,
    agent=agent,
    num_episodes=conf.agent.prefill_episodes,
)

# Training Loop

In [None]:
batch_size = conf.agent.batch_size
num_updates = conf.agent.num_updates
env_name = conf.env.name
train_episodes = conf.episodes
max_episode_steps = conf.env.max_episode_steps

# Lists for logging
rewards = []
final_errors = []
steps = []

for e in tqdm(range(train_episodes), desc="Training"):
    td = env.reset()
    done = td.get("done", False)
    truncated = td.get("truncated", False)
    ep_return = 0
    ep_steps = 0
    total_step_times = []

    while not done and not truncated:
        ep_steps += 1
        td = agent.get_action(td)
        td = env.step(td)
        agent.add_experience(td)
        done = td.get(("next", "done"), False)
        ep_return += td.get(("next", "reward"), 0)

        td = step_mdp(td)
        if done:
            break
    
    # Train the agent
    loss_info = agent.train(
        batch_size=batch_size, num_updates=num_updates * ep_steps
    )

    # Metrics Logging
    rewards.append(ep_return)
    steps.append(ep_steps)
    final_errors.append(td.get(("error"), 0).item())

# Save agent weights or replay buffer
logout(agent)
# Close environment
env.close()


In [13]:
import matplotlib.pyplot as plt

ModuleNotFoundError: No module named 'matplotlib'