In [1]:
### Resources:

# DDPG Algorithm: https://spinningup.openai.com/en/latest/algorithms/ddpg.html

# DDPG Paper: https://arxiv.org/abs/1509.02971

In [1]:
import gym
import torch
from torch import nn
import numpy as np
from spinup.utils.run_utils import setup_logger_kwargs
from spinup import ddpg_pytorch as ddpg

from models import MLPActorCritic
from train import DDPGTrainer

In [2]:
### Test MLPActorCritic class for continuous environments 

env_continuous = gym.make('Pendulum-v0')
hidden_sizes, hidden_acts = [64, 64], nn.Tanh
action_std = 0.1

# Without Batch Normalization
ac = MLPActorCritic(env_continuous, hidden_sizes, hidden_sizes, hidden_acts, 
                    hidden_acts, action_std, use_BN=False)
print('DDPG Actor-Critic (Without BN) Summary \n\n')
ac.layer_summary()

# With Batch Normalization
ac = MLPActorCritic(env_continuous, hidden_sizes, hidden_sizes, hidden_acts, 
                    hidden_acts, action_std, use_BN=True)
print('DDPG Actor-Critic (With BN) Summary \n\n')
ac.layer_summary()

DDPG Actor-Critic (Without BN) Summary 


Actor Summary: 

Linear input & output shapes:	 torch.Size([1, 3]) torch.Size([1, 64])
Tanh input & output shapes:	 torch.Size([1, 64]) torch.Size([1, 64])
Linear input & output shapes:	 torch.Size([1, 64]) torch.Size([1, 64])
Tanh input & output shapes:	 torch.Size([1, 64]) torch.Size([1, 64])
Linear input & output shapes:	 torch.Size([1, 64]) torch.Size([1, 1])
Tanh input & output shapes:	 torch.Size([1, 1]) torch.Size([1, 1])


Critic Summary: 

Linear input & output shapes:	 torch.Size([1, 3]) torch.Size([1, 64])
Tanh input & output shapes:	 torch.Size([1, 64]) torch.Size([1, 64])
Linear input & output shapes:	 torch.Size([1, 65]) torch.Size([1, 64])
Tanh input & output shapes:	 torch.Size([1, 64]) torch.Size([1, 64])
Linear input & output shapes:	 torch.Size([1, 64]) torch.Size([1, 1])


DDPG Actor-Critic (With BN) Summary 


Actor Summary: 

BatchNorm1d input & output shapes:	 torch.Size([1, 3]) torch.Size([1, 3])
Linear input & output sh



In [4]:
### Define parameters needed for training

# Actor-Critic module parameters
hidden_sizes, hidden_acts = [64, 64], nn.ReLU
action_std = 0.1
use_BN=False
ac_kwargs = dict(hidden_sizes_actor=hidden_sizes,
                 hidden_sizes_critic=hidden_sizes,
                 hidden_acts_actor=hidden_acts,
                 hidden_acts_critic=hidden_acts,
                 action_std=action_std,
                 use_BN=use_BN)

# Training parameters
seed = 0
prioritized_replay = False
steps_per_epoch, epochs = 4000, 50
buf_size = 1000000
gamma, polyak = 0.99, 0.995
pi_lr, q_lr = 1e-3, 1e-3
batch_size = 100
start_steps, learning_starts = 10000, 1000
update_every, num_test_episodes = 50, 10
max_ep_len, save_freq = 1000, 10

# Experiment parameters
data_dir = '/home/sherif/user/python/DRL/data/ddpg'

In [10]:
### Start the training process using the Spinning up DDPG implementation

env_fn = lambda : gym.make('Pendulum-v0')
exp_name_custom = 'ddpg_spinup'

logger_kwargs = setup_logger_kwargs(exp_name=exp_name_custom, data_dir=data_dir, seed=seed)

ddpg(env_fn=env_fn, ac_kwargs=dict(hidden_sizes=hidden_sizes), seed=seed, 
     steps_per_epoch=steps_per_epoch, epochs=epochs, replay_size=buf_size, 
     gamma=gamma, polyak=polyak, pi_lr=pi_lr, q_lr=q_lr, batch_size=batch_size,
     start_steps=start_steps, update_after=learning_starts, update_every=update_every,
     act_noise=action_std, num_test_episodes=num_test_episodes, max_ep_len=max_ep_len,
     logger_kwargs=logger_kwargs, save_freq=save_freq)

[32;1mLogging data to /home/sherif/user/python/DRL/data/ddpg/ddpg_spinup/ddpg_spinup_s0/progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac_kwargs":	{
        "hidden_sizes":	[
            64,
            64
        ]
    },
    "act_noise":	0.1,
    "actor_critic":	"MLPActorCritic",
    "batch_size":	100,
    "env_fn":	"<function <lambda> at 0x70953273a2f0>",
    "epochs":	50,
    "exp_name":	"ddpg_spinup",
    "gamma":	0.99,
    "logger":	{
        "<spinup.utils.logx.EpochLogger object at 0x709532a09b70>":	{
            "epoch_dict":	{},
            "exp_name":	"ddpg_spinup",
            "first_row":	true,
            "log_current_row":	{},
            "log_headers":	[],
            "output_dir":	"/home/sherif/user/python/DRL/data/ddpg/ddpg_spinup/ddpg_spinup_s0",
            "output_file":	{
                "<_io.TextIOWrapper name='/home/sherif/user/python/DRL/data/ddpg/ddpg_spinup/ddpg_spinup_s0/progress.txt' mode='w' encoding='UTF-8'>":	{
                    "mode":	"w"
     

In [10]:
### Start the training process using the custom DDPG implementation (Without BN or PER)

env_fn = lambda : gym.make('Pendulum-v0')
exp_name_custom = 'ddpg_custom'

logger_kwargs = setup_logger_kwargs(exp_name=exp_name_custom, data_dir=data_dir, seed=seed)

trainer = DDPGTrainer()
trainer.train_mod(env_fn, ac=MLPActorCritic, ac_kwargs=ac_kwargs, seed=seed, 
                  prioritized_replay=prioritized_replay, steps_per_epoch=steps_per_epoch,
                  epochs=epochs, buf_size=buf_size, gamma=gamma, polyak=polyak, pi_lr=pi_lr,
                  q_lr=q_lr, batch_size=batch_size, start_steps=start_steps,
                  learning_starts=learning_starts, update_every=update_every, 
                  num_test_episodes=num_test_episodes, max_ep_len=max_ep_len, 
                  logger_kwargs=logger_kwargs, save_freq=save_freq)

[32;1mLogging data to /home/sherif/user/python/DRL/data/ddpg/ddpg_custom/ddpg_custom_s0/progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac":	"MLPActorCritic",
    "ac_kwargs":	{
        "action_std":	0.1,
        "hidden_acts_actor":	"ReLU",
        "hidden_acts_critic":	"ReLU",
        "hidden_sizes_actor":	[
            64,
            64
        ],
        "hidden_sizes_critic":	[
            64,
            64
        ],
        "use_BN":	false
    },
    "ac_mod":	{
        "MLPActorCritic(\n  (actor): MLPActor(\n    (net): Sequential(\n      (hidden_1): Linear(in_features=3, out_features=64, bias=True)\n      (activation_1): ReLU()\n      (hidden_2): Linear(in_features=64, out_features=64, bias=True)\n      (activation_2): ReLU()\n      (output): Linear(in_features=64, out_features=1, bias=True)\n      (output_act): Tanh()\n    )\n    (net_target): Sequential(\n      (hidden_1): Linear(in_features=3, out_features=64, bias=True)\n      (activation_1): ReLU()\n      (hidden_2):

In [5]:
### Start the training process using the custom DDPG implementation (With BN)

env_fn = lambda : gym.make('Pendulum-v0')
exp_name_custom = 'ddpg_bn_custom'

ac_kwargs['use_BN'] = True
logger_kwargs = setup_logger_kwargs(exp_name=exp_name_custom, data_dir=data_dir, seed=seed)

trainer = DDPGTrainer()
trainer.train_mod(env_fn, ac=MLPActorCritic, ac_kwargs=ac_kwargs, seed=seed, 
                  prioritized_replay=prioritized_replay, steps_per_epoch=steps_per_epoch,
                  epochs=epochs, buf_size=buf_size, gamma=gamma, polyak=polyak, pi_lr=pi_lr,
                  q_lr=q_lr, batch_size=batch_size, start_steps=start_steps,
                  learning_starts=learning_starts, update_every=update_every, 
                  num_test_episodes=num_test_episodes, max_ep_len=max_ep_len, 
                  logger_kwargs=logger_kwargs, save_freq=save_freq)

[32;1mLogging data to /home/sherif/user/python/DRL/data/ddpg/ddpg_bn_custom/ddpg_bn_custom_s0/progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac":	"MLPActorCritic",
    "ac_kwargs":	{
        "action_std":	0.1,
        "hidden_acts_actor":	"ReLU",
        "hidden_acts_critic":	"ReLU",
        "hidden_sizes_actor":	[
            64,
            64
        ],
        "hidden_sizes_critic":	[
            64,
            64
        ],
        "use_BN":	true
    },
    "ac_mod":	{
        "MLPActorCritic(\n  (actor): MLPActor(\n    (net): Sequential(\n      (batch_norm_0): BatchNorm1d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n      (hidden_1): Linear(in_features=3, out_features=64, bias=True)\n      (batch_norm_1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n      (activation_1): ReLU()\n      (hidden_2): Linear(in_features=64, out_features=64, bias=True)\n      (batch_norm_2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine

In [5]:
### Start the training process using the custom DDPG implementation (With BN and PER)

env_fn = lambda : gym.make('Pendulum-v0')
exp_name_custom = 'ddpg_bn_per_custom'

ac_kwargs['use_BN'] = True
prioritized_replay = True
logger_kwargs = setup_logger_kwargs(exp_name=exp_name_custom, data_dir=data_dir, seed=seed)

trainer = DDPGTrainer()
trainer.train_mod(env_fn, ac=MLPActorCritic, ac_kwargs=ac_kwargs, seed=seed, 
                  prioritized_replay=prioritized_replay, steps_per_epoch=steps_per_epoch,
                  epochs=epochs, buf_size=buf_size, gamma=gamma, polyak=polyak, pi_lr=pi_lr,
                  q_lr=q_lr, batch_size=batch_size, start_steps=start_steps,
                  learning_starts=learning_starts, update_every=update_every, 
                  num_test_episodes=num_test_episodes, max_ep_len=max_ep_len, 
                  logger_kwargs=logger_kwargs, save_freq=save_freq)

[32;1mLogging data to /home/sherif/user/python/DRL/data/ddpg/ddpg_bn_per_custom/ddpg_bn_per_custom_s0/progress.txt[0m
[36;1mSaving config:
[0m
{
    "ac":	"MLPActorCritic",
    "ac_kwargs":	{
        "action_std":	0.1,
        "hidden_acts_actor":	"ReLU",
        "hidden_acts_critic":	"ReLU",
        "hidden_sizes_actor":	[
            64,
            64
        ],
        "hidden_sizes_critic":	[
            64,
            64
        ],
        "use_BN":	true
    },
    "ac_mod":	{
        "MLPActorCritic(\n  (actor): MLPActor(\n    (net): Sequential(\n      (batch_norm_0): BatchNorm1d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n      (hidden_1): Linear(in_features=3, out_features=64, bias=True)\n      (batch_norm_1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n      (activation_1): ReLU()\n      (hidden_2): Linear(in_features=64, out_features=64, bias=True)\n      (batch_norm_2): BatchNorm1d(64, eps=1e-05, momentum=0.1