In [1]:
import torch
import numpy as np

import os
from definitions import LOG_DIR
from torch.utils.tensorboard import SummaryWriter

import gym
from gym.logger import ERROR

from src.agent import Agent
from src.simulation import simulate
from src.ornstein_uhlenbeck_process import OrnsteinUhlenbeckProcess

from matplotlib import pyplot as plt
from src.actor import Actor
import copy

In [2]:
writer = SummaryWriter(LOG_DIR)

In [3]:
gym.logger.set_level(ERROR)

In [4]:
env = gym.make("LunarLanderContinuous-v2")
agent = Agent(
    device="cuda",
    state_dim=env.observation_space.shape[0],
    action_dim=env.action_space.shape[0],
    actor_layer_sizes=[256, 128],
    critic_layer_sizes=[256, 128],
    replay_buffer_max_size=1e6,
    batch_size=128,
    learning_freq=1,
    γ=0.995,
    μ_θ_α=1e-4,
    Q_Φ_α=1e-3,
    ρ=0.95,
    noise_sigma=0.2,
    train_after=64,
    exploration=True,
    writer=writer,
    train_steps_per_update=1
)

In [14]:
env.close()

In [6]:
# agent.exploration = False

In [23]:
agent.train_steps_per_update = 16
agent.exploration = True

In [None]:
simulate(env, agent, episodes=500, max_episode_steps=1000, render=False)

HBox(children=(FloatProgress(value=0.0, max=500.0), HTML(value='')))

In [18]:
for _ in range(int(1e4)):
    agent.train_step()

In [22]:
agent.exploration = False
simulate(env, agent, episodes=10, render=True)

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


