In [1]:
%load_ext autoreload

In [2]:
%autoreload
import torch
import numpy as np

import os
from definitions import LOG_DIR, SAVED_AGENTS
from torch.utils.tensorboard import SummaryWriter

import gym
from gym.logger import ERROR

from src.agent import Agent
from src.simulation import simulate
from src.ornstein_uhlenbeck_process import OrnsteinUhlenbeckProcess

from matplotlib import pyplot as plt
from src.actor import Actor
import copy

In [3]:
writer = SummaryWriter(LOG_DIR)

In [4]:
gym.logger.set_level(ERROR)

In [5]:
env = gym.make("LunarLanderContinuous-v2")
# env = gym.make("BipedalWalker-v3")
agent = Agent(
    device="cuda",
    state_dim=env.observation_space.shape[0],
    action_dim=env.action_space.shape[0],
    actor_layer_sizes=[256, 128],
    critic_layer_sizes=[256, 128],
    replay_buffer_max_size=1e6,
    batch_size=128,
    learning_freq=1,
    γ=0.995,
    μ_θ_α=1e-4,
    Q_Φ_α=1e-3,
    ρ=0.95,
    noise_sigma=0.2,
    train_after=64,
    exploration=True,
    writer=writer,
    train_steps_per_update=1,
    action_high=env.action_space.high
)

In [11]:
random_agent = Agent(
    device="cuda",
    state_dim=env.observation_space.shape[0],
    action_dim=env.action_space.shape[0],
    actor_layer_sizes=[256, 128],
    critic_layer_sizes=[256, 128],
    replay_buffer_max_size=1e6,
    batch_size=128,
    learning_freq=1,
    γ=0.995,
    μ_θ_α=1e-4,
    Q_Φ_α=1e-3,
    ρ=0.95,
    noise_sigma=0.2,
    train_after=64,
    exploration=True,
    writer=None,
    train_steps_per_update=1,
    action_high=env.action_space.high
)

In [6]:
# agent = agent.to("cpu")

In [17]:
env.close()

In [8]:
# Training with exploration
agent.exploration = True
simulate(
    env,
    agent,
    episodes=400,
    max_episode_steps=1000,
    render=False
)

HBox(children=(FloatProgress(value=0.0, max=400.0), HTML(value='')))




In [9]:
# Mastery achieving 
agent.exploration = False
simulate(env, agent, episodes=100, render=False)

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




In [14]:
# Demo
agent.exploration = False
simulate(env, random_agent, episodes=10, render=True)

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))




In [16]:
# Demo
agent.exploration = False
simulate(env, agent, episodes=10, render=True)

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))

KeyboardInterrupt: 

In [11]:
# Saving and loading
agent_name = "agent007"
agent_path = os.path.join(SAVED_AGENTS, agent_name)
agent_path

agent.save(agent_path)

loaded_agent = Agent.load(agent_path)

Agent saved successfully! (agent.writer object can't be saved so this field has been set to `None`)
