This script is an extension to the inverted pendulum DQN algorithm in inv_pend.ipynb. This scripts automates the grid search in a hyperparameter space to explore the best performance of DQN.

In [27]:
import random                       # To draw a random batch of samples form the replay buffer
import gymnasium as gym
import torch
from torch import nn
import torch.optim as optim
from torchsummary import summary

import numpy as np
import matplotlib.pyplot as plt

import os                                               # For saving models and training results
from datetime import datetime                           # For creating the directory of each training run
import json                                             # For storing training parameters during each run
from contextlib import redirect_stdout

from collections import deque       # replay buffer is a double ended queue that allows elements to be added either to the end or the start

# Setup

In [2]:
env = gym.make('CartPole-v1')
obs,_ = env.reset()

obs_space = env.observation_space.shape[0]
action_space = env.action_space.n

In [3]:
class QNet_MLP(nn.Module):
    def __init__(self,input_dim,output_dim,hidden_layer: list):
        super().__init__()

        self.layers = nn.ModuleList()
        self.input_dim = input_dim
        self.output_dim = output_dim

        for size in hidden_layer:
            self.layers.append(nn.Linear(self.input_dim, size))
            self.input_dim = size
        
        self.layers.append(nn.Linear(self.input_dim,self.output_dim))
    
    def forward(self, input_data):
        for layer in self.layers:
            input_data = layer(input_data)
        return input_data 

In [25]:
def create_directory(trial_number: int,
                     model_number: int,
                     lr: float, 
                     gamma: float,
                     epsilon_decay: int,
                     batch_size: int, 
                     buffer_size: int,
                     target_update_freq: int):
    ''' Function that creates directory to save model state_dict, architecture, training configurationi and history

    Parameters: 
    ------------
    (hyperparameters for differentiating between different directory)
    
    lr : float
        the learning rate to optimize the Q network
    gamma : float 
        the discount rate in Q learning
    epsilon_decay : integer
        the amount of episode over which the exploratory rate (epsilon) decays
    batch_size : integer
        number of experience drawn from replay buffer to train the behaviour network
    buffer_size : integer
        the number of samples in the replay buffer at a time
    target_udpate_freq : integer
        the amount of step count during the training process before updating the target Q net (loading the parameters of the behaviour net onto the target Q Net)
    

    Returns
    ------------ 
    output_dir : path
        the directory to which the trainin results and model (state_dict and architecture) are saved

    '''
    timestamp = datetime.now().strftime("%y%m%d_%H%M")
    BASE_DIR = os.getcwd()

    name_codified = f"run_{trial_number:04d}"
    hyperparam_codified = f"{timestamp}_{model_number}_{lr}_{gamma}_{epsilon_decay}_{batch_size}_{buffer_size}_{target_update_freq}"

    OUTPUT_DIR = os.path.join(BASE_DIR,"inv_pend_results",name_codified)
    os.makedirs(OUTPUT_DIR, exist_ok=True)      # Create the directory

    # Store the training configs in JSON file
    training_params = {
        'lr': lr,
        'gamma': gamma,
        'epsilon_decay': epsilon_decay,
        'batch_size': batch_size,
        'buffer_size': buffer_size,
        'target_update_freq': target_update_freq
    }

    params_path = os.path.join(OUTPUT_DIR, "params_results.json")
    with open(params_path, "w") as f:
        json.dump({"parameters": training_params}, f, indent=2)


    return name_codified, hyperparam_codified, OUTPUT_DIR

# Parameter Set

In [5]:
model_registry = {
    'DQN_MLP_v0': {
        'class': QNet_MLP,
        'config': [64,32]
    },
    'DQN_MLP_v1': {
        'class': QNet_MLP,
        'config': [32,32]
    },
    'DQN_MLP_v2': {
        
        'class': QNet_MLP,
        'config': [32,16]
    },
    'DQN_MLP_v3': {
        'class': QNet_MLP,
        'config': [16,16]
    }
}

The block below is used for testing the model creation automation (Uncomment the first line to run the test)

In [6]:
%%script test --no-raise-error
for model in model_registry:
    Q_net = QNet_MLP(obs_space, action_space, model_registry[model]['config'])
    # print(model_registry[model]['config'])
    print(Q_net)

Couldn't find program: 'test'


In [None]:
param_grid = {
    'LR': [5e-4, 1e-3, 5e-3, 1e-2],
    "BUFFER_SIZE": [1e3, 5e3, 1e4],
    "MIN_REPLAY_SIZE": [1000],
    "TARGET_UPDATE_FREQ": [1e3, 5e3, 1e4],

    "GAMMA": [0.90, 0.95, 0.98],
    "EPSILON_START": [1.0],
    "EPSILON_END": [0.1],
    "EPSILON_DECAY": [1000, 5000, 10000],

    "EPISODE_TRAIN": [5000],                # training truncation criteria
    "BATCH_SIZE": [32, 64, 128]
}

success_criteria = 450                      #


In [41]:
import itertools

keys, values = zip(*param_grid.items())
for v in itertools.product(*values):
    config = dict(zip(keys,v))
    print(config)

{'LR': 0.0005, 'BUFFER_SIZE': 1000.0, 'MIN_REPLAY_SIZE': 1000, 'TARGET_UPDATE_FREQ': 1000.0, 'GAMMA': 0.9, 'EPSILON_START': 1.0, 'EPSILON_END': 0.1, 'EPSILON_DECAY': 1000, 'EPISODE_TRAIN': 1000, 'BATCH_SIZE': 32}
{'LR': 0.0005, 'BUFFER_SIZE': 1000.0, 'MIN_REPLAY_SIZE': 1000, 'TARGET_UPDATE_FREQ': 1000.0, 'GAMMA': 0.9, 'EPSILON_START': 1.0, 'EPSILON_END': 0.1, 'EPSILON_DECAY': 1000, 'EPISODE_TRAIN': 1000, 'BATCH_SIZE': 64}
{'LR': 0.0005, 'BUFFER_SIZE': 1000.0, 'MIN_REPLAY_SIZE': 1000, 'TARGET_UPDATE_FREQ': 1000.0, 'GAMMA': 0.9, 'EPSILON_START': 1.0, 'EPSILON_END': 0.1, 'EPSILON_DECAY': 1000, 'EPISODE_TRAIN': 1000, 'BATCH_SIZE': 128}
{'LR': 0.0005, 'BUFFER_SIZE': 1000.0, 'MIN_REPLAY_SIZE': 1000, 'TARGET_UPDATE_FREQ': 1000.0, 'GAMMA': 0.9, 'EPSILON_START': 1.0, 'EPSILON_END': 0.1, 'EPSILON_DECAY': 1000, 'EPISODE_TRAIN': 5000, 'BATCH_SIZE': 32}
{'LR': 0.0005, 'BUFFER_SIZE': 1000.0, 'MIN_REPLAY_SIZE': 1000, 'TARGET_UPDATE_FREQ': 1000.0, 'GAMMA': 0.9, 'EPSILON_START': 1.0, 'EPSILON_END': 0.1

In [None]:
 *param_grid.items()
a

SyntaxError: can't use starred expression here (2929000448.py, line 1)

# Grid Search

In [None]:
trial_number = 0
trial_dict = {}

for model_number, model in enumerate(model_registry):
    hidden_layers = model_registry[model]['config']
    QNet = QNet_MLP(obs_space, action_space, hidden_layers)

    for lr in LR:
        for gamma in GAMMA:
            for epsilon_decay in EPSILON_DECAY:
                for batch_size in BATCH_SIZE:
                    for buffer_size in BUFFER_SIZE:
                        for target_update_freq in TARGET_UPDATE_FREQ:
                            # Create directory to store the training configuration and results
                            trial_number += 1
                            name, hyperparam, path = create_directory(trial_number,model_number+1,lr,gamma,epsilon_decay,batch_size,buffer_size,target_update_freq)
                            trial_dict[name] = hyperparam
                            print(hyperparam)

                            model_path = os.path.join(path,'model_summary.json')
                            with open(model_path, 'w') as f:
                                with redirect_stdout(f):
                                    summary(QNet, (obs_space, ))

                            # Keep track of the best performance and weights of this best network.
                            break
                        break
                    break
                break
            break
        break
    break                      
                            # Store the training parameters

                            # Plot the result and store in the directory
                            # for episode in range(max(EPISODE_TRAIN)):   # Train to the max number of episode_train and save checkpoints along the way

250521_1516_1_0.0005_0.9_1000_32_1000.0_1000.0


In [17]:
QNet = QNet_MLP(obs_space, action_space, [64, 32])
summary(QNet, (obs_space,))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                   [-1, 64]             320
            Linear-2                   [-1, 32]           2,080
            Linear-3                    [-1, 2]              66
Total params: 2,466
Trainable params: 2,466
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.01
Estimated Total Size (MB): 0.01
----------------------------------------------------------------
