In [1]:
import numpy as np
import matplotlib.pyplot as plt
import os, shutil
from tqdm import tqdm
from rl_glue import RLGlue

from tbu_gym.tbu_discrete import TruckBackerEnv_D

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


In [None]:
import numpy as np
from tbu_gym.tbu_discrete import TruckBackerEnv_D
import matplotlib.pyplot as plt

#hyper params, these are set by default in the agent but can be changed here
num_episodes = 1000
max_steps_per_episode = 500
gamma = 0.99
learning_rate = 1e-3
epsilon_start = 1.0
epsilon_decay = 0.99997
epsilon_min = 0.01
batch_size = 64
target_update_freq = 5

#agent env setup
env = TruckBackerEnv_D(render_mode=None)
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.n

agent = QRCAgent(
    state_dim=state_dim,
    action_dim=action_dim,
    lr=learning_rate,
    gamma=gamma,
    epsilon=epsilon_start,
    epsilon_decay=epsilon_decay,
    epsilon_min=epsilon_min,
    batch_size=batch_size
)

#training
episode_rewards = []

for episode in range(1, num_episodes + 1):
    state = env.reset()
    total_reward = 0

    for t in range(max_steps_per_episode):
  
        action = agent.agent_policy(state) # e greedy action
  
        # step env
        next_state, reward, done, info = env.step(action) # do a observe s',r and if terminal
        total_reward += reward

        #store transition in memory
        agent.remember(state, action, reward, next_state, done) # set s' = s a r and if terminal

        # train agent with memory, will train with batch size set in agent
        agent.train_with_mem() 

        state = next_state #s = s'

        if done: # if terminal state
            break


    if episode % target_update_freq == 0:
        agent.update_target()

    episode_rewards.append(total_reward)
    
    print(f"Episode {episode}, Reward: {total_reward}, Epsilon: {agent.epsilon:.3f}")


plt.plot(episode_rewards)
plt.xlabel('Episode')
plt.ylabel('Total Reward')
plt.title('QRC Training on TruckBackerEnv_D')
plt.show()
