# DQN Hyperparameter tuning with Optuna

Note: Use pytorch_lightning==1.6.0

In [4]:
import copy
import gym
import torch
import random
import statistics

import numpy as np
import torch.nn.functional as F

from collections import deque, namedtuple
from IPython.display import HTML
from base64 import b64encode

from torch import Tensor, nn
from torch.utils.data import DataLoader
from torch.utils.data.dataset import IterableDataset
from torch.optim import AdamW

from pytorch_lightning import LightningModule, Trainer

from pytorch_lightning.callbacks import EarlyStopping

from gym.wrappers import RecordVideo, RecordEpisodeStatistics, TimeLimit
import matplotlib.pyplot as plt
%matplotlib inline

import optuna
from optuna.integration import PyTorchLightningPruningCallback
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
num_gpus = torch.cuda.device_count()
print(f"num gpus : {num_gpus}")

num gpus : 1


In [5]:
## Creating Deep Q Network:
class DQN(nn.Module):
    def __init__ (self , hidden_size , obs_size , action_size):
        super().__init__()
        self.net = nn.Sequential(
        nn.Linear(obs_size , hidden_size),
        nn.ReLU(),
        nn.Linear(hidden_size , hidden_size),
        nn.ReLU(),
        nn.Linear(hidden_size , action_size),
        )
    def forward(self , x):
        return self.net(x.float())

In [6]:
## Creating Policy: state -> action or action_probs
def epsilon_greedy(state , env , net , epsilon=0):
    if np.random.random() < epsilon :
        action = env.action_space.sample()
        
    else:
        state = torch.tensor([state]).to(device)
        q_values = net(state)
        _ , action = torch.max(q_values , dim=1) # returns (value , idx)
        action = int(action.item())
    
    return action

In [7]:
## Creating Replay Buffer:
class ReplayBuffer:
    
    def __init__(self , capacity):
        self.buffer = deque(maxlen=capacity) #    it's like a list but manages its contents automaticly
        
    def __len__(self):
        return len(self.buffer)
    
    def append(self, experience):
        self.buffer.append(experience)
    
    def sample(self , batch_size):
        return random.sample(self.buffer , batch_size)
    

In [8]:
class RLDataset(IterableDataset):
    
    def __init__ (self , buffer , sample_size = 200):
        self.buffer = buffer
        self.sample_size = sample_size
        
    def __iter__(self):
        for experience in self.buffer.sample(self.sample_size):
            yield experience # returns by request of pytorch
     

In [9]:
## Creating Environment
def create_environment(name):
    env = gym.make(name)
    env = TimeLimit(env , max_episode_steps = 400)  #terminates after 400 steps
    env = RecordVideo(env , video_folder = './videos' , episode_trigger=lambda x: x%50==0 )
    env = RecordEpisodeStatistics(env)
    return env

In [10]:
class DeepQLearning(LightningModule):
    
    # intialize
    def __init__(self , env_name , policy=epsilon_greedy , capacity=100_000 , batch_size=1024 , lr = 0.001 ,
                 hidden_size=128 , gamma=0.99, loss_fn = F.smooth_l1_loss , optim = AdamW ,
                eps_start = 1.0 , eps_end = 0.15 , eps_last_episode=100 , samples_per_epoch=10_000 ,
                sync_rate=10):
        super().__init__()
        self.env = create_environment(env_name)
        obs_size=self.env.observation_space.shape[0]
        action_size = self.env.action_space.n
        self.q_net=DQN(hidden_size , obs_size , action_size)
        self.target_q_net=copy.deepcopy(self.q_net)
        self.policy = policy
        self.buffer = ReplayBuffer(capacity=capacity)
        
        self.save_hyperparameters()
        
        while len(self.buffer)  < self.hparams.samples_per_epoch:
            self.play_episode(epsilon=self.hparams.eps_start)
    
    @torch.no_grad()
    def play_episode(self ,policy=None ,  epsilon =0):
        state = self.env.reset()
        done = False
        while not done :
            
            if policy:
                action = policy(state , self.env , self.q_net , epsilon = epsilon)
            else:
                action = self.env.action_space.sample()
            next_state , reward , done , _ = self.env.step(action)
            exp = (state , action , reward , done , next_state)
            self.buffer.append(exp)
            state = next_state
            
        
        
    # forward
    def forward(self , x):
        return self.q_net(x)
    
    
    # configure optimizers
    def configure_optimizers(self):
        q_net_optimizer = self.hparams.optim(self.q_net.parameters() , lr = self.hparams.lr)
        return [q_net_optimizer]
    
    
    # create dataloader
    def train_dataloader(self):
        dataset = RLDataset(self.buffer , self.hparams.samples_per_epoch)
        dataloader = DataLoader(dataset=dataset ,batch_size=self.hparams.batch_size )
        return dataloader
    
    
    # training step
    def training_step(self , batch , batch_idx):
        states , actions , rewards , dones , next_states = batch
        actions = actions.unsqueeze(1)
        rewards = rewards.unsqueeze(1)
        dones = dones.unsqueeze(1)
        state_action_values = self.q_net(states).gather(1,actions)
        next_action_values , _ = self.target_q_net(next_states).max(dim=1 , keepdim=True)
        expected_state_action_values = rewards + self.hparams.gamma * next_action_values * (torch.logical_not(dones))
        loss = self.hparams.loss_fn(state_action_values , expected_state_action_values )
        self.log('episode/Q-error' , loss)
        return loss
        
        
    
    # training epoch end
    def training_epoch_end(self, training_step_outputs):
        epsilon = max(self.hparams.eps_end , self.hparams.eps_start - self.current_epoch/self.hparams.eps_last_episode)
        self.play_episode(policy=self.policy , epsilon=epsilon)
        self.log('episode/Return' , self.env.return_queue[-1])
        returns = list(self.env.return_queue)[-100:]
        self.log('hp_metric',statistics.mean(returns))
        
        if self.current_epoch % self.hparams.sync_rate == 0:
            self.target_q_net.load_state_dict(self.q_net.state_dict())
        

In [11]:
## Objective Function:
def objective(trial):
    lr = trial.suggest_float("lr" , 1e-5 , 1e-1 , log=True) #logarithmic scale to not spend too much time searching on smaller values
    gamma = trial.suggest_float("gamma" , 0.0 , 1.0)
    hidden_size = trial.suggest_int("hidden_size" , low=16 , high=256 , step=16)
    
    algo = DeepQLearning("LunarLander-v2" , lr=lr , gamma=gamma , hidden_size=hidden_size)
    
    callback = PyTorchLightningPruningCallback(trial , monitor = 'hp_metric')
    trainer = Trainer(gpus=num_gpus , max_epochs=200 , callbacks=[])
    
    hyperparameters = dict(lr=lr , gamma=gamma , hidden_size=hidden_size)
    trainer.logger.log_hyperparams(hyperparameters)
    
    trainer.fit(algo)
    
    return trainer.callback_metrics['hp_metric'].item()
    

In [12]:
## Study:
pruner = optuna.pruners.SuccessiveHalvingPruner()
study = optuna.create_study(direction='maximize' , pruner = pruner)

[I 2023-07-27 16:54:30,051] A new study created in memory with name: no-name-3fec4476-a8c1-4c42-8bf7-31acd6928589


In [13]:
!rm -r lightning_logs/
!rm -r videos/
%load_ext tensorboard
%tensorboard --logdir lightning_logs/

rm: cannot remove 'lightning_logs/': No such file or directory


Reusing TensorBoard on port 6006 (pid 13104), started 1:56:10 ago. (Use '!kill 13104' to kill it.)

In [14]:
study.optimize(objective , n_trials=20)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type | Params
--------------------------------------
0 | q_net        | DQN  | 14.1 K
1 | target_q_net | DQN  | 14.1 K
--------------------------------------
28.2 K    Trainable params
0         Non-trainable params
28.2 K    Total params
0.113     Total estimated model params size (MB)
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

  state = torch.tensor([state]).to(device)
[I 2023-07-27 16:55:50,723] Trial 0 finished with value: -71.73099517822266 and parameters: {'lr': 0.0007912637499326837, 'gamma': 0.02614805920645935, 'hidden_size': 112}. Best is trial 0 with value: -71.73099517822266.
  logger.warn(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type | Params
--------------------------------------
0 | q_net        | DQN  | 46.2 K
1 | target_q_net | DQN  | 46.2 K
--------------------------------------
92.4 K    Trainable params
0         Non-trainable params
92.4 K    Total params
0.369     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Exception ignored in: <function Viewer.__del__ at 0x00000226AFA6EA60>
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 185, in __del__
    self.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 101, in close
    self.window.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\win32\__init__.py", line 332, in close
    super(Win32Window, self).close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\__init__.py", line 858, in close
    app.windows.remove(self)
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\_weakrefset.py", line 114, in remove
    self.data.remove(ref(item))
KeyError: <weakref at 0x00000226AFA26310; to 'Win32Window' at 0x00000226A8619C10>
[I 2023-07-27 16:57:15,023] Trial 1 finished with value: -42.322547912597656 and parameters: {'lr': 0.0010154155294917794, 'ga

Training: 0it [00:00, ?it/s]

Exception ignored in: <function Viewer.__del__ at 0x00000226AFA6EA60>
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 185, in __del__
    self.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 101, in close
    self.window.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\win32\__init__.py", line 332, in close
    super(Win32Window, self).close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\__init__.py", line 858, in close
    app.windows.remove(self)
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\_weakrefset.py", line 114, in remove
    self.data.remove(ref(item))
KeyError: <weakref at 0x000002276E8ED540; to 'Win32Window' at 0x0000022765F6B910>
[I 2023-07-27 16:58:38,204] Trial 2 finished with value: -48.804439544677734 and parameters: {'lr': 0.014094220029630601, 'gam

Training: 0it [00:00, ?it/s]

Exception ignored in: <function Viewer.__del__ at 0x00000226AFA6EA60>
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 185, in __del__
    self.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 101, in close
    self.window.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\win32\__init__.py", line 332, in close
    super(Win32Window, self).close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\__init__.py", line 858, in close
    app.windows.remove(self)
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\_weakrefset.py", line 114, in remove
    self.data.remove(ref(item))
KeyError: <weakref at 0x000002272FF4B4A0; to 'Win32Window' at 0x00000226C10680D0>
[I 2023-07-27 17:00:09,263] Trial 3 finished with value: -87.79645538330078 and parameters: {'lr': 9.16275109628167e-05, 'gamm

Training: 0it [00:00, ?it/s]

Exception ignored in: <function Viewer.__del__ at 0x00000226AFA6EA60>
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 185, in __del__
    self.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 101, in close
    self.window.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\win32\__init__.py", line 332, in close
    super(Win32Window, self).close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\__init__.py", line 858, in close
    app.windows.remove(self)
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\_weakrefset.py", line 114, in remove
    self.data.remove(ref(item))
KeyError: <weakref at 0x00000226B810CD60; to 'Win32Window' at 0x00000226B4AA0DF0>
[I 2023-07-27 17:01:06,833] Trial 4 finished with value: -327.2330017089844 and parameters: {'lr': 1.9295090125652937e-05, 'ga

Training: 0it [00:00, ?it/s]

Exception ignored in: <function Viewer.__del__ at 0x00000226AFA6EA60>
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 185, in __del__
    self.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 101, in close
    self.window.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\win32\__init__.py", line 332, in close
    super(Win32Window, self).close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\__init__.py", line 858, in close
    app.windows.remove(self)
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\_weakrefset.py", line 114, in remove
    self.data.remove(ref(item))
KeyError: <weakref at 0x0000022729DAFEF0; to 'Win32Window' at 0x00000227494D0190>
[I 2023-07-27 17:02:49,208] Trial 5 finished with value: -77.34109497070312 and parameters: {'lr': 0.00763667621038851, 'gamma

Training: 0it [00:00, ?it/s]

Exception ignored in: <function Viewer.__del__ at 0x00000226AFA6EA60>
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 185, in __del__
    self.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 101, in close
    self.window.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\win32\__init__.py", line 332, in close
    super(Win32Window, self).close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\__init__.py", line 858, in close
    app.windows.remove(self)
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\_weakrefset.py", line 114, in remove
    self.data.remove(ref(item))
KeyError: <weakref at 0x000002277F230400; to 'Win32Window' at 0x000002275E30BEE0>
[I 2023-07-27 17:04:16,780] Trial 6 finished with value: -66.2492904663086 and parameters: {'lr': 0.009566616443292713, 'gamma

Training: 0it [00:00, ?it/s]

Exception ignored in: <function Viewer.__del__ at 0x00000226AFA6EA60>
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 185, in __del__
    self.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 101, in close
    self.window.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\win32\__init__.py", line 332, in close
    super(Win32Window, self).close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\__init__.py", line 858, in close
    app.windows.remove(self)
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\_weakrefset.py", line 114, in remove
    self.data.remove(ref(item))
KeyError: <weakref at 0x000002276A579EA0; to 'Win32Window' at 0x000002277B5E0CA0>
[I 2023-07-27 17:05:54,705] Trial 7 finished with value: -70.26634216308594 and parameters: {'lr': 0.00010843811753761367, 'ga

Training: 0it [00:00, ?it/s]

Exception ignored in: <function Viewer.__del__ at 0x00000226AFA6EA60>
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 185, in __del__
    self.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 101, in close
    self.window.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\win32\__init__.py", line 332, in close
    super(Win32Window, self).close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\__init__.py", line 858, in close
    app.windows.remove(self)
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\_weakrefset.py", line 114, in remove
    self.data.remove(ref(item))
KeyError: <weakref at 0x00000226FD0873B0; to 'Win32Window' at 0x00000226C10D7C40>
[I 2023-07-27 17:07:26,263] Trial 8 finished with value: -67.11900329589844 and parameters: {'lr': 0.030629549857479333, 'gamm

Training: 0it [00:00, ?it/s]

Exception ignored in: <function Viewer.__del__ at 0x00000226AFA6EA60>
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 185, in __del__
    self.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 101, in close
    self.window.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\win32\__init__.py", line 332, in close
    super(Win32Window, self).close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\__init__.py", line 858, in close
    app.windows.remove(self)
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\_weakrefset.py", line 114, in remove
    self.data.remove(ref(item))
KeyError: <weakref at 0x000002276A55EC20; to 'Win32Window' at 0x00000226B91D3E20>
[I 2023-07-27 17:08:52,665] Trial 9 finished with value: -62.13378143310547 and parameters: {'lr': 0.01178229903454039, 'gamma

Training: 0it [00:00, ?it/s]

Exception ignored in: <function Viewer.__del__ at 0x00000226AFA6EA60>
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 185, in __del__
    self.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 101, in close
    self.window.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\win32\__init__.py", line 332, in close
    super(Win32Window, self).close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\__init__.py", line 858, in close
    app.windows.remove(self)
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\_weakrefset.py", line 114, in remove
    self.data.remove(ref(item))
KeyError: <weakref at 0x00000226B80896D0; to 'Win32Window' at 0x00000226B4AA0D00>
[I 2023-07-27 17:10:31,689] Trial 10 finished with value: -14.531746864318848 and parameters: {'lr': 0.001157407434581816, 'ga

Training: 0it [00:00, ?it/s]

Exception ignored in: <function Viewer.__del__ at 0x00000226AFA6EA60>
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 185, in __del__
    self.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 101, in close
    self.window.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\win32\__init__.py", line 332, in close
    super(Win32Window, self).close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\__init__.py", line 858, in close
    app.windows.remove(self)
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\_weakrefset.py", line 114, in remove
    self.data.remove(ref(item))
KeyError: <weakref at 0x000002278CC10360; to 'Win32Window' at 0x00000227155D1490>
[I 2023-07-27 17:12:00,892] Trial 11 finished with value: -25.439970016479492 and parameters: {'lr': 0.0013709202376380807, 'g

Training: 0it [00:00, ?it/s]

Exception ignored in: <function Viewer.__del__ at 0x00000226AFA6EA60>
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 185, in __del__
    self.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 101, in close
    self.window.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\win32\__init__.py", line 332, in close
    super(Win32Window, self).close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\__init__.py", line 858, in close
    app.windows.remove(self)
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\_weakrefset.py", line 114, in remove
    self.data.remove(ref(item))
KeyError: <weakref at 0x000002278937F1D0; to 'Win32Window' at 0x00000226FD0A6310>
[I 2023-07-27 17:13:31,575] Trial 12 finished with value: -34.426631927490234 and parameters: {'lr': 0.09089509266845, 'gamma'

Training: 0it [00:00, ?it/s]

Exception ignored in: <function Viewer.__del__ at 0x00000226AFA6EA60>
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 185, in __del__
    self.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 101, in close
    self.window.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\win32\__init__.py", line 332, in close
    super(Win32Window, self).close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\__init__.py", line 858, in close
    app.windows.remove(self)
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\_weakrefset.py", line 114, in remove
    self.data.remove(ref(item))
KeyError: <weakref at 0x00000227893FE630; to 'Win32Window' at 0x0000022749CA2C10>
[I 2023-07-27 17:14:59,954] Trial 13 finished with value: -30.436830520629883 and parameters: {'lr': 0.001975540393410073, 'ga

Training: 0it [00:00, ?it/s]

Exception ignored in: <function Viewer.__del__ at 0x00000226AFA6EA60>
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 185, in __del__
    self.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 101, in close
    self.window.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\win32\__init__.py", line 332, in close
    super(Win32Window, self).close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\__init__.py", line 858, in close
    app.windows.remove(self)
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\_weakrefset.py", line 114, in remove
    self.data.remove(ref(item))
KeyError: <weakref at 0x0000022765B84720; to 'Win32Window' at 0x00000226B8079E50>
[I 2023-07-27 17:16:33,330] Trial 14 finished with value: -17.307844161987305 and parameters: {'lr': 0.002453256964344032, 'ga

Training: 0it [00:00, ?it/s]

Exception ignored in: <function Viewer.__del__ at 0x00000226AFA6EA60>
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 185, in __del__
    self.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 101, in close
    self.window.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\win32\__init__.py", line 332, in close
    super(Win32Window, self).close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\__init__.py", line 858, in close
    app.windows.remove(self)
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\_weakrefset.py", line 114, in remove
    self.data.remove(ref(item))
KeyError: <weakref at 0x00000226B80B3590; to 'Win32Window' at 0x0000022749CAF760>
[I 2023-07-27 17:18:05,412] Trial 15 finished with value: -16.04950523376465 and parameters: {'lr': 0.0026195017357380115, 'ga

Training: 0it [00:00, ?it/s]

Exception ignored in: <function Viewer.__del__ at 0x00000226AFA6EA60>
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 185, in __del__
    self.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 101, in close
    self.window.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\win32\__init__.py", line 332, in close
    super(Win32Window, self).close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\__init__.py", line 858, in close
    app.windows.remove(self)
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\_weakrefset.py", line 114, in remove
    self.data.remove(ref(item))
KeyError: <weakref at 0x00000227893DA5E0; to 'Win32Window' at 0x000002273FB8A9D0>
[I 2023-07-27 17:19:34,661] Trial 16 finished with value: -37.0911979675293 and parameters: {'lr': 0.0003667894560355202, 'gam

Training: 0it [00:00, ?it/s]

Exception ignored in: <function Viewer.__del__ at 0x00000226AFA6EA60>
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 185, in __del__
    self.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 101, in close
    self.window.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\win32\__init__.py", line 332, in close
    super(Win32Window, self).close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\__init__.py", line 858, in close
    app.windows.remove(self)
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\_weakrefset.py", line 114, in remove
    self.data.remove(ref(item))
KeyError: <weakref at 0x000002272FF447C0; to 'Win32Window' at 0x00000226B918BE80>
[I 2023-07-27 17:21:10,642] Trial 17 finished with value: 16.095420837402344 and parameters: {'lr': 0.003807635325661047, 'gam

Training: 0it [00:00, ?it/s]

Exception ignored in: <function Viewer.__del__ at 0x00000226AFA6EA60>
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 185, in __del__
    self.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 101, in close
    self.window.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\win32\__init__.py", line 332, in close
    super(Win32Window, self).close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\__init__.py", line 858, in close
    app.windows.remove(self)
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\_weakrefset.py", line 114, in remove
    self.data.remove(ref(item))
KeyError: <weakref at 0x0000022789434540; to 'Win32Window' at 0x0000022749CA2430>
[I 2023-07-27 17:22:37,787] Trial 18 finished with value: -27.06827163696289 and parameters: {'lr': 0.004190166416332845, 'gam

Training: 0it [00:00, ?it/s]

Exception ignored in: <function Viewer.__del__ at 0x00000226AFA6EA60>
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 185, in __del__
    self.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 101, in close
    self.window.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\win32\__init__.py", line 332, in close
    super(Win32Window, self).close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\__init__.py", line 858, in close
    app.windows.remove(self)
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\_weakrefset.py", line 114, in remove
    self.data.remove(ref(item))
KeyError: <weakref at 0x000002277EEB5EF0; to 'Win32Window' at 0x000002277F20CD00>
[I 2023-07-27 17:24:09,085] Trial 19 finished with value: -10.858508110046387 and parameters: {'lr': 0.0004701590775504911, 'g

In [15]:
study.best_params

{'lr': 0.003807635325661047, 'gamma': 0.9943885578314805, 'hidden_size': 256}

In [17]:
!rm -r lightning_logs/
!rm -r videos/
%load_ext tensorboard
%tensorboard --logdir lightning_logs/

rm: cannot remove 'lightning_logs/': No such file or directory


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


rm: cannot remove 'videos/': No such file or directory


Reusing TensorBoard on port 6006 (pid 13104), started 2:28:05 ago. (Use '!kill 13104' to kill it.)

In [18]:
## Now Let's train with the best parameters:
algo = DeepQLearning("LunarLander-v2" ,**study.best_params)
trainer = Trainer(gpus=num_gpus , max_epochs=5000 )
trainer.fit(algo)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: C:\Users\Ali\Documents\RLwithPhil\code\lightning_logs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name         | Type | Params
--------------------------------------
0 | q_net        | DQN  | 69.1 K
1 | target_q_net | DQN  | 69.1 K
--------------------------------------
138 K     Trainable params
0         Non-trainable params
138 K     Total params
0.553     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Exception ignored in: <function Viewer.__del__ at 0x00000226AFA6EA60>
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 185, in __del__
    self.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\gym\envs\classic_control\rendering.py", line 101, in close
    self.window.close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\win32\__init__.py", line 332, in close
    super(Win32Window, self).close()
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\site-packages\pyglet\window\__init__.py", line 858, in close
    app.windows.remove(self)
  File "C:\ProgramData\Anaconda3\envs\vrep\lib\_weakrefset.py", line 114, in remove
    self.data.remove(ref(item))
KeyError: <weakref at 0x0000022765F7A2C0; to 'Win32Window' at 0x00000226B918BA30>
