# Amazing RL Agent

In [None]:
import gymnasium as gym
import torch
import numpy as np
import importlib

def moving_average(x, w):
    return np.convolve(x, np.ones(w), 'same') / w

In [None]:
# Test pytorch is working
x = torch.rand(size=(1000,))
x = x + 1

# If you have a nvidia gpu with torch installed correctly
# you can move the tensors to the gpu for some extra
# speed, not necessary although.
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
print(f"Using {DEVICE}")
# When creating a new tensor make sure you move it to a
# device using .to(DEVICE), such as:
x = x.to(DEVICE)
# The tensor in x has been moved to the device


In [None]:
# Import agent
import agents

# Reload module as jupyter notebooks wont update to any changes youve made
importlib.reload(
    agents,
)

RENDER_AGENT = None

# agent = agents.TD3(24, 4, 1, noise=0.25, copy_step=3, device=DEVICE)
agent = agents.DDPG(24, 4, 1, batch_size=32, device=DEVICE)
scores = []
import matplotlib.pyplot as plt


In [None]:
try:
    env.close()
except:
    pass

env = gym.make("BipedalWalker-v3", hardcore=False, render_mode=RENDER_AGENT)
plt.figure(figsize=(1600/96, 800/96), dpi=96)
observation, info = env.reset(seed=27)
steps_per_episode = 1600
episodes = 250

for episode in range(episodes):
    rewards = 0
    step = 0
    while(step < steps_per_episode):
        action = agent.choose_action(observation, env.action_space).cpu()
        
        action = np.array(action)
        observation, reward, terminated, truncated, info = env.step(action)
        
        rewards += reward

        reward_agent = 100 * reward if reward > 0 else reward
        
        agent.update(observation, reward_agent, terminated, truncated)
        if terminated or truncated:
            observation, info = env.reset()

        step += 1
    scores.append(rewards)
    if episode % 1 == 0:
        print(f"Episode: {episode}, Reward: {rewards}, Memory Size: {agent.memory.size}")

    ma = max(scores) 
    mi = min(scores) 
    plt.clf()
    plt.figure(figsize=(1600/96, 800/96), dpi=96)
    plt.title(f"Best: {ma}")
    plt.ylim((mi, ma))
    plt.xlim((0, len(scores)))

    plt.plot(np.array(scores), color="lightblue")
    plt.plot(moving_average(np.array(scores), 64)[:-32], color="blue") 
    plt.axhline(y=0, color="black", linestyle="-")
    plt.axhline(y=300, color="red", linestyle="-")
    plt.savefig("training.png")
    if RENDER_AGENT != None: 
        env.render()
    
    observation, info = env.reset()
env.close()


In [None]:
torch.save(agent.actor.state_dict(), "modded_learning_ddpg_actor.pth")
torch.save(agent.critic.state_dict(), "modded_learning_ddpg_critic.pth")

np.savetxt("scores.csv", scores, delimiter=",")
