### Importing the libraries

In [1]:
from typing import Callable, Dict, List, Optional, Tuple, Type, Union, Any

import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
import torch as th

from stable_baselines3 import PPO
from stable_baselines3 import DQN
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from stable_baselines3.common.policies import ActorCriticPolicy
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.vec_env import VecTransposeImage
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import EvalCallback

In [2]:
#!cd github & git clone https://github.com/mwydmuch/ViZDoom.git

### Importing  OpenAI and Doom Dependencies

In [3]:
from vizdoom import *
import random
import time
import gym
from gym import Env
from gym.spaces import Discrete, Box
import cv2

## Game Setup

### VizDoom

In [4]:
#Creating a game instance
game = DoomGame()
#Load our desired Doom configurations 
game.load_config('github/ViZDoom/scenarios/basic.cfg')
#starting up the game
#game.init()

True

## Wrapping and defining our environment 

### VizDoom

In [5]:
#Define our vizdoom environment class
class VizDoomGym(Env):
    #Initialize our environment
    def __init__(self, render=False):
        #inherit from env base class
        super().__init__()
        self.game = DoomGame()
        #This allows us to load up our configurations which defines our maps,rewards,buttons etc...
        self.game.load_config('github/ViZDoom/scenarios/basic.cfg')
        
        
        #Determine if to render game window
        if render == False:
            self.game.set_window_visible(False)
        else:
            self.game.set_window_visible(True)
        
        #start up the game
        self.game.init()
        
        #create our observation space.
        #We want the same of the observation space to match the game frame exactly- 
        #This is what is used to establish the parameters for the underlying models.
        self.observation_space = Box(low=0, high=255, shape=(100,160,1), dtype=np.uint8)
        #define our action space
        self.action_space = Discrete(3)
    #take step in environment
    def step(self, action):
        #Define the action to take
        actions = np.identity(3, dtype=np.uint8)
        #this actions will be a matrix defining if the agent go left,
        #right or shoot and also our frame skip parameter
        reward = self.game.make_action(actions[action],4)        
        
        #return numpy zeroes array if nothing is returned
        if self.game.get_state():
            state = self.game.get_state().screen_buffer
            #gray scaling the captured image
            state = self.grayscale(state)
            ammo = self.game.get_state().game_variables[0]
            info = ammo
        else:
            state = np.zeros(self.observation_space.shape)
            info = 0      
        
        
        
        info = {"info":info}
        done = self.game.is_episode_finished()
        
        return state, reward, done, info
    #render game 
    def render():
        pass    
    def reset(self):
        self.game.new_episode()        
        state = self.game.get_state().screen_buffer
        return self.grayscale(state)
    
    #grayscale and resize the image
    def grayscale(self, observation):
        #take the observation, grab the color channel and move it to the end
        gray = cv2.cvtColor(np.moveaxis(observation, 0, -1), cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (160,100), interpolation=cv2.INTER_CUBIC)
        state = np.reshape(resize, (100,160,1))
        return state
    #close the game
    def close(self):
        #self.game.close()
        pass

### Verify our environment

In [6]:
#import environment checker
from stable_baselines3.common import env_checker

In [12]:
env = VizDoomGym(render=True)

In [13]:
env_checker.check_env(env)

In [15]:
env.close()

In [16]:
#create our environment without rendering it
env = VizDoomGym()

### Defining custom DQN Network

In [30]:
class CustomCNN(BaseFeaturesExtractor):
    
    
    def __init__(self, observation_space: gym.spaces.Box, features_dim: int = 256):
        super(CustomCNN, self).__init__(observation_space, features_dim)
        # We assume CxHxW images (channels first)
        # Re-ordering will be done by pre-preprocessing or wrapper
        n_input_channels = observation_space.shape[0]
        self.cnn = nn.Sequential(
            nn.Conv2d(n_input_channels, 32, kernel_size=8, stride=4, padding=0),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=4, stride=2, padding=0),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=0),
            nn.ReLU(),
            nn.Flatten(),
        )

        # Compute shape by doing one forward pass
        with th.no_grad():
            n_flatten = self.cnn(
                th.as_tensor(observation_space.sample()[None]).float()
            ).shape[1]

        self.linear = nn.Sequential(nn.Linear(n_flatten, features_dim), nn.ReLU())

    def forward(self, observations: th.Tensor) -> th.Tensor:
        return self.linear(self.cnn(observations))
    

policy_kwargs = dict(
    features_extractor_class=CustomCNN,    
)

### Setting up Callback

In [31]:
class TrainingAndLoggingCallback(BaseCallback):
    
   
    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainingAndLoggingCallback, self). __init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path
        
    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)
            
            
    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)
            
        return True

In [32]:
CHECKPOINT_DIR = './train/train_basic3'
LOG_DIR = './train/log_basic3'

In [33]:
callback = TrainingAndLoggingCallback(check_freq=10000, save_path=CHECKPOINT_DIR)

### Create and Train model

In [34]:
 model = DQN("CnnPolicy", env, buffer_size = 320000,batch_size = 64, policy_kwargs=policy_kwargs,
             verbose=1,optimize_memory_usage = True, learning_rate=0.001)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [35]:
model.learn(total_timesteps=100000,callback=callback)

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 56.8     |
|    ep_rew_mean      | -252     |
|    exploration_rate | 0.978    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 656      |
|    time_elapsed     | 0        |
|    total_timesteps  | 227      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 47.8     |
|    ep_rew_mean      | -179     |
|    exploration_rate | 0.964    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 748      |
|    time_elapsed     | 0        |
|    total_timesteps  | 382      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 40.7     |
|    ep_rew_mean      | -128     |
|    exploration_rate | 0.954    |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 40.5     |
|    ep_rew_mean      | -134     |
|    exploration_rate | 0.646    |
| time/               |          |
|    episodes         | 92       |
|    fps              | 783      |
|    time_elapsed     | 4        |
|    total_timesteps  | 3727     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 40.2     |
|    ep_rew_mean      | -132     |
|    exploration_rate | 0.633    |
| time/               |          |
|    episodes         | 96       |
|    fps              | 787      |
|    time_elapsed     | 4        |
|    total_timesteps  | 3860     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 40.3     |
|    ep_rew_mean      | -133     |
|    exploration_rate | 0.618    |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 33.7     |
|    ep_rew_mean      | -90.9    |
|    exploration_rate | 0.374    |
| time/               |          |
|    episodes         | 180      |
|    fps              | 840      |
|    time_elapsed     | 7        |
|    total_timesteps  | 6586     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 32.4     |
|    ep_rew_mean      | -83.7    |
|    exploration_rate | 0.373    |
| time/               |          |
|    episodes         | 184      |
|    fps              | 839      |
|    time_elapsed     | 7        |
|    total_timesteps  | 6600     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 32.4     |
|    ep_rew_mean      | -82.2    |
|    exploration_rate | 0.352    |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 35.4     |
|    ep_rew_mean      | -99.8    |
|    exploration_rate | 0.0696   |
| time/               |          |
|    episodes         | 268      |
|    fps              | 850      |
|    time_elapsed     | 11       |
|    total_timesteps  | 9794     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 34.4     |
|    ep_rew_mean      | -92.8    |
|    exploration_rate | 0.0631   |
| time/               |          |
|    episodes         | 272      |
|    fps              | 839      |
|    time_elapsed     | 11       |
|    total_timesteps  | 9862     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 34.4     |
|    ep_rew_mean      | -92.6    |
|    exploration_rate | 0.0594   |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.6     |
|    ep_rew_mean      | -68.2    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 356      |
|    fps              | 840      |
|    time_elapsed     | 14       |
|    total_timesteps  | 12319    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.8     |
|    ep_rew_mean      | -69.4    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 360      |
|    fps              | 842      |
|    time_elapsed     | 14       |
|    total_timesteps  | 12495    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 30.9     |
|    ep_rew_mean      | -76.5    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 33.5     |
|    ep_rew_mean      | -87.8    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 444      |
|    fps              | 864      |
|    time_elapsed     | 17       |
|    total_timesteps  | 15342    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 33.9     |
|    ep_rew_mean      | -89.6    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 448      |
|    fps              | 865      |
|    time_elapsed     | 17       |
|    total_timesteps  | 15442    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 34.8     |
|    ep_rew_mean      | -95.2    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.1     |
|    ep_rew_mean      | -55.1    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 532      |
|    fps              | 881      |
|    time_elapsed     | 20       |
|    total_timesteps  | 17796    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 26.4     |
|    ep_rew_mean      | -45.9    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 536      |
|    fps              | 882      |
|    time_elapsed     | 20       |
|    total_timesteps  | 17848    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 26.3     |
|    ep_rew_mean      | -45.7    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.3     |
|    ep_rew_mean      | -60.6    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 620      |
|    fps              | 896      |
|    time_elapsed     | 22       |
|    total_timesteps  | 20381    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.4     |
|    ep_rew_mean      | -54.8    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 624      |
|    fps              | 895      |
|    time_elapsed     | 22       |
|    total_timesteps  | 20393    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.2     |
|    ep_rew_mean      | -54.2    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.1     |
|    ep_rew_mean      | -63.7    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 708      |
|    fps              | 905      |
|    time_elapsed     | 25       |
|    total_timesteps  | 22908    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.2     |
|    ep_rew_mean      | -64.9    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 712      |
|    fps              | 904      |
|    time_elapsed     | 25       |
|    total_timesteps  | 23000    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.6     |
|    ep_rew_mean      | -65.9    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 31.7     |
|    ep_rew_mean      | -77.5    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 796      |
|    fps              | 911      |
|    time_elapsed     | 28       |
|    total_timesteps  | 25785    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 31.9     |
|    ep_rew_mean      | -78.5    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 800      |
|    fps              | 911      |
|    time_elapsed     | 28       |
|    total_timesteps  | 25839    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 31.8     |
|    ep_rew_mean      | -78.3    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.5     |
|    ep_rew_mean      | -28.9    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 884      |
|    fps              | 915      |
|    time_elapsed     | 30       |
|    total_timesteps  | 27717    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.9     |
|    ep_rew_mean      | -25.8    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 888      |
|    fps              | 917      |
|    time_elapsed     | 30       |
|    total_timesteps  | 27879    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.4     |
|    ep_rew_mean      | -29.4    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 37.5     |
|    ep_rew_mean      | -114     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 972      |
|    fps              | 927      |
|    time_elapsed     | 33       |
|    total_timesteps  | 31080    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 37.9     |
|    ep_rew_mean      | -117     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 976      |
|    fps              | 927      |
|    time_elapsed     | 33       |
|    total_timesteps  | 31205    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 37.5     |
|    ep_rew_mean      | -114     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 30.1     |
|    ep_rew_mean      | -70.1    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1060     |
|    fps              | 933      |
|    time_elapsed     | 36       |
|    total_timesteps  | 33757    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.2     |
|    ep_rew_mean      | -63      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1064     |
|    fps              | 933      |
|    time_elapsed     | 36       |
|    total_timesteps  | 33831    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 30.2     |
|    ep_rew_mean      | -68.7    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 30.8     |
|    ep_rew_mean      | -71.2    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1148     |
|    fps              | 928      |
|    time_elapsed     | 39       |
|    total_timesteps  | 36268    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.6     |
|    ep_rew_mean      | -62.3    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1152     |
|    fps              | 928      |
|    time_elapsed     | 39       |
|    total_timesteps  | 36384    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 26.8     |
|    ep_rew_mean      | -44.1    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28       |
|    ep_rew_mean      | -51.3    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1236     |
|    fps              | 923      |
|    time_elapsed     | 42       |
|    total_timesteps  | 38822    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.2     |
|    ep_rew_mean      | -53.6    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1240     |
|    fps              | 923      |
|    time_elapsed     | 42       |
|    total_timesteps  | 38945    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.3     |
|    ep_rew_mean      | -53.6    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 25       |
|    ep_rew_mean      | -36.5    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1324     |
|    fps              | 920      |
|    time_elapsed     | 44       |
|    total_timesteps  | 41084    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 25.2     |
|    ep_rew_mean      | -37.5    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1328     |
|    fps              | 921      |
|    time_elapsed     | 44       |
|    total_timesteps  | 41198    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 25.9     |
|    ep_rew_mean      | -42.2    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 30.9     |
|    ep_rew_mean      | -75.2    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1412     |
|    fps              | 925      |
|    time_elapsed     | 47       |
|    total_timesteps  | 43937    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 31.4     |
|    ep_rew_mean      | -77.1    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1416     |
|    fps              | 924      |
|    time_elapsed     | 47       |
|    total_timesteps  | 44055    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 30.5     |
|    ep_rew_mean      | -72.1    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 25.8     |
|    ep_rew_mean      | -42.6    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1500     |
|    fps              | 929      |
|    time_elapsed     | 49       |
|    total_timesteps  | 46189    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 25.7     |
|    ep_rew_mean      | -40      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1504     |
|    fps              | 929      |
|    time_elapsed     | 49       |
|    total_timesteps  | 46332    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 27       |
|    ep_rew_mean      | -47.4    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 27.5     |
|    ep_rew_mean      | -51.5    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1588     |
|    fps              | 927      |
|    time_elapsed     | 52       |
|    total_timesteps  | 48630    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 27.1     |
|    ep_rew_mean      | -48.8    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1592     |
|    fps              | 927      |
|    time_elapsed     | 52       |
|    total_timesteps  | 48681    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 25.7     |
|    ep_rew_mean      | -40.6    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 33       |
|    ep_rew_mean      | -86.4    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1664     |
|    fps              | 646      |
|    time_elapsed     | 79       |
|    total_timesteps  | 51264    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 5.94     |
|    n_updates        | 315      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 33.1     |
|    ep_rew_mean      | -86.9    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1668     |
|    fps              | 628      |
|    time_elapsed     | 81       |
|    total_timesteps  | 51389    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 5.52     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 37.1     |
|    ep_rew_mean      | -105     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1728     |
|    fps              | 425      |
|    time_elapsed     | 125      |
|    total_timesteps  | 53439    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 3.56     |
|    n_updates        | 859      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 38.1     |
|    ep_rew_mean      | -111     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1732     |
|    fps              | 408      |
|    time_elapsed     | 131      |
|    total_timesteps  | 53739    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.141    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 39.4     |
|    ep_rew_mean      | -118     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1792     |
|    fps              | 310      |
|    time_elapsed     | 180      |
|    total_timesteps  | 56228    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 1.99     |
|    n_updates        | 1556     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 38.3     |
|    ep_rew_mean      | -111     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1796     |
|    fps              | 307      |
|    time_elapsed     | 183      |
|    total_timesteps  | 56348    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 1.91     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 40.4     |
|    ep_rew_mean      | -122     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1856     |
|    fps              | 255      |
|    time_elapsed     | 230      |
|    total_timesteps  | 58816    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 8.65     |
|    n_updates        | 2203     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 39.9     |
|    ep_rew_mean      | -120     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1860     |
|    fps              | 252      |
|    time_elapsed     | 233      |
|    total_timesteps  | 58970    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 5.03     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 55.3     |
|    ep_rew_mean      | -202     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1920     |
|    fps              | 204      |
|    time_elapsed     | 305      |
|    total_timesteps  | 62579    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 2.36     |
|    n_updates        | 3144     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 57.5     |
|    ep_rew_mean      | -214     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1924     |
|    fps              | 201      |
|    time_elapsed     | 312      |
|    total_timesteps  | 62879    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 4.19     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 54       |
|    ep_rew_mean      | -183     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1984     |
|    fps              | 182      |
|    time_elapsed     | 357      |
|    total_timesteps  | 65407    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.412    |
|    n_updates        | 3851     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 53.8     |
|    ep_rew_mean      | -181     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1988     |
|    fps              | 181      |
|    time_elapsed     | 362      |
|    total_timesteps  | 65691    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.792    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 44.9     |
|    ep_rew_mean      | -125     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2048     |
|    fps              | 166      |
|    time_elapsed     | 411      |
|    total_timesteps  | 68308    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.819    |
|    n_updates        | 4576     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 43.5     |
|    ep_rew_mean      | -116     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2052     |
|    fps              | 165      |
|    time_elapsed     | 414      |
|    total_timesteps  | 68474    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 1.98     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 38.8     |
|    ep_rew_mean      | -95.5    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2112     |
|    fps              | 153      |
|    time_elapsed     | 460      |
|    total_timesteps  | 70713    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 2.23     |
|    n_updates        | 5178     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 38.2     |
|    ep_rew_mean      | -92.8    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2116     |
|    fps              | 152      |
|    time_elapsed     | 463      |
|    total_timesteps  | 70871    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 1.14     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 24.9     |
|    ep_rew_mean      | -23.2    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2176     |
|    fps              | 148      |
|    time_elapsed     | 483      |
|    total_timesteps  | 71809    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.713    |
|    n_updates        | 5452     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23.5     |
|    ep_rew_mean      | -15.4    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2180     |
|    fps              | 147      |
|    time_elapsed     | 487      |
|    total_timesteps  | 71970    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 5.33     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 13.9     |
|    ep_rew_mean      | 35.9     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2240     |
|    fps              | 144      |
|    time_elapsed     | 503      |
|    total_timesteps  | 72775    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 7.61     |
|    n_updates        | 5693     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 14.8     |
|    ep_rew_mean      | 31.5     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2244     |
|    fps              | 144      |
|    time_elapsed     | 504      |
|    total_timesteps  | 72864    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.373    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 14.8     |
|    ep_rew_mean      | 34       |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2304     |
|    fps              | 141      |
|    time_elapsed     | 522      |
|    total_timesteps  | 73711    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 3.9      |
|    n_updates        | 5927     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.8     |
|    ep_rew_mean      | 28.7     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2308     |
|    fps              | 140      |
|    time_elapsed     | 526      |
|    total_timesteps  | 73935    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 3.53     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.6     |
|    ep_rew_mean      | 24.8     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2368     |
|    fps              | 137      |
|    time_elapsed     | 545      |
|    total_timesteps  | 74850    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.57     |
|    n_updates        | 6212     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 14.9     |
|    ep_rew_mean      | 28.4     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2372     |
|    fps              | 137      |
|    time_elapsed     | 545      |
|    total_timesteps  | 74867    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 8.79     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 11.3     |
|    ep_rew_mean      | 46.9     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2432     |
|    fps              | 135      |
|    time_elapsed     | 556      |
|    total_timesteps  | 75346    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 3.74     |
|    n_updates        | 6336     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 11.4     |
|    ep_rew_mean      | 46.2     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2436     |
|    fps              | 135      |
|    time_elapsed     | 556      |
|    total_timesteps  | 75368    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 3.83     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.1     |
|    ep_rew_mean      | 29.9     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2496     |
|    fps              | 132      |
|    time_elapsed     | 576      |
|    total_timesteps  | 76487    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 1        |
|    n_updates        | 6621     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 13.7     |
|    ep_rew_mean      | 37.6     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2500     |
|    fps              | 132      |
|    time_elapsed     | 576      |
|    total_timesteps  | 76507    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 1.44     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 14.7     |
|    ep_rew_mean      | 34       |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2560     |
|    fps              | 130      |
|    time_elapsed     | 591      |
|    total_timesteps  | 77359    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 1.45     |
|    n_updates        | 6839     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 14.2     |
|    ep_rew_mean      | 36.4     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2564     |
|    fps              | 130      |
|    time_elapsed     | 592      |
|    total_timesteps  | 77386    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 3.78     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 14.9     |
|    ep_rew_mean      | 30.7     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2624     |
|    fps              | 127      |
|    time_elapsed     | 617      |
|    total_timesteps  | 78487    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 3.65     |
|    n_updates        | 7121     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 16.3     |
|    ep_rew_mean      | 22.7     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2628     |
|    fps              | 126      |
|    time_elapsed     | 621      |
|    total_timesteps  | 78641    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 1.76     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.6     |
|    ep_rew_mean      | 24.4     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2688     |
|    fps              | 125      |
|    time_elapsed     | 633      |
|    total_timesteps  | 79266    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 2.1      |
|    n_updates        | 7316     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 16.1     |
|    ep_rew_mean      | 21.7     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2692     |
|    fps              | 124      |
|    time_elapsed     | 635      |
|    total_timesteps  | 79354    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 2.56     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.6     |
|    ep_rew_mean      | 25.4     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2752     |
|    fps              | 122      |
|    time_elapsed     | 654      |
|    total_timesteps  | 80399    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 3.69     |
|    n_updates        | 7599     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.7     |
|    ep_rew_mean      | 24.9     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2756     |
|    fps              | 122      |
|    time_elapsed     | 655      |
|    total_timesteps  | 80423    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 7.12     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 18.6     |
|    ep_rew_mean      | 8.8      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2816     |
|    fps              | 120      |
|    time_elapsed     | 676      |
|    total_timesteps  | 81654    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 5.84     |
|    n_updates        | 7913     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 17.8     |
|    ep_rew_mean      | 13.2     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2820     |
|    fps              | 120      |
|    time_elapsed     | 676      |
|    total_timesteps  | 81669    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 4.53     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 13.7     |
|    ep_rew_mean      | 36.7     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2880     |
|    fps              | 119      |
|    time_elapsed     | 687      |
|    total_timesteps  | 82200    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 5.46     |
|    n_updates        | 8049     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 13.5     |
|    ep_rew_mean      | 37.5     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2884     |
|    fps              | 119      |
|    time_elapsed     | 687      |
|    total_timesteps  | 82208    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 4.71     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 12.3     |
|    ep_rew_mean      | 42.7     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2944     |
|    fps              | 117      |
|    time_elapsed     | 705      |
|    total_timesteps  | 83062    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.996    |
|    n_updates        | 8265     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 12.6     |
|    ep_rew_mean      | 40.5     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2948     |
|    fps              | 117      |
|    time_elapsed     | 707      |
|    total_timesteps  | 83143    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 3.88     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 18.2     |
|    ep_rew_mean      | 14.4     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3008     |
|    fps              | 115      |
|    time_elapsed     | 727      |
|    total_timesteps  | 84294    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 2.22     |
|    n_updates        | 8573     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 19.6     |
|    ep_rew_mean      | 6.74     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3012     |
|    fps              | 115      |
|    time_elapsed     | 730      |
|    total_timesteps  | 84448    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 5.02     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 19.7     |
|    ep_rew_mean      | 4.49     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3072     |
|    fps              | 113      |
|    time_elapsed     | 755      |
|    total_timesteps  | 85742    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 6.14     |
|    n_updates        | 8935     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 19.9     |
|    ep_rew_mean      | 3.55     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3076     |
|    fps              | 113      |
|    time_elapsed     | 756      |
|    total_timesteps  | 85786    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 2.05     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 20.3     |
|    ep_rew_mean      | 1.91     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3136     |
|    fps              | 111      |
|    time_elapsed     | 778      |
|    total_timesteps  | 86887    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.879    |
|    n_updates        | 9221     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 20.3     |
|    ep_rew_mean      | 1.99     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3140     |
|    fps              | 111      |
|    time_elapsed     | 778      |
|    total_timesteps  | 86898    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 2.61     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 20.3     |
|    ep_rew_mean      | 0.01     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3200     |
|    fps              | 109      |
|    time_elapsed     | 804      |
|    total_timesteps  | 88143    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.91     |
|    n_updates        | 9535     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 20.3     |
|    ep_rew_mean      | 0.05     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3204     |
|    fps              | 109      |
|    time_elapsed     | 804      |
|    total_timesteps  | 88157    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 2.18     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 16.7     |
|    ep_rew_mean      | 18.4     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3264     |
|    fps              | 108      |
|    time_elapsed     | 822      |
|    total_timesteps  | 88969    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 1.98     |
|    n_updates        | 9742     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 15.9     |
|    ep_rew_mean      | 21.5     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 3268     |
|    fps              | 108      |
|    time_elapsed     | 824      |
|    total_timesteps  | 89055    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 1.91     |
|    n_updates      

KeyboardInterrupt: 

## Optuna for hyperparameter tuning

### Dependencies for optuna

In [37]:
import optuna
from optuna.pruners import MedianPruner
from optuna.samplers import TPESampler

### Optuna variables

In [39]:
N_TRIALS = 100
N_STARTUP_TRIALS = 10
N_EVALUATIONS = 2
N_TIMESTEPS = int(2e4)
EVAL_FREQ = int(N_TIMESTEPS / N_EVALUATIONS)
N_EVAL_EPISODES = 3
DEFAULT_HYPERPARAMS = {
    "policy": "CnnPolicy",
    "env": env,
}

### Optuna Sampler

In [40]:
def sample_DQN_params(trial: optuna.Trial) -> Dict[str, Any]:
    
    gamma = 1.0 - trial.suggest_float("gamma", 0.0001, 0.1, log=True)
    buffer_size = trial.suggest_categorical("buffer_size ",[int(1e3),int(1e4),int(1e5)])    
    learning_rate = trial.suggest_float("lr", 1e-5, 1, log=True)
    max_grad_norm = trial.suggest_float("max_grad_norm", 0.3, 5.0, log=True)
    batch_size = trial.suggest_categorical("batch_size", [32, 64,128])    
    net_arch = trial.suggest_categorical("net_arch", ["tiny", "small"])
    activation_fn = trial.suggest_categorical("activation_fn", ["tanh", "relu"])
    seed  = trial.suggest_categorical("seed ", [1, 2,3,4,5])
    optimize_memory_usage = True

    # Display true values
    trial.set_user_attr("gamma_", gamma)    

    net_arch = [
        16,32 if net_arch == "tiny" else 32,32,32
    ]

    activation_fn = { "relu": nn.ReLU, "tanh": nn.Tanh,}[activation_fn]

    return {        
        "gamma": gamma,
        "buffer_size": buffer_size,
        "learning_rate": learning_rate,
        "batch_size": batch_size,
        "max_grad_norm": max_grad_norm,
        "optimize_memory_usage": optimize_memory_usage,
        "policy_kwargs": {
            "net_arch": net_arch,
            "activation_fn": activation_fn,            
        },
    }

### Callback for evaluating trial

In [41]:
class TrialEvalCallback(EvalCallback):
    
    def __init__(
        self,
        eval_env: gym.Env,
        trial: optuna.Trial,
        n_eval_episodes: int = 5,
        eval_freq: int = 10000,
        deterministic: bool = True,
        verbose: int = 0,
    ):

        super().__init__(
            eval_env=eval_env,
            n_eval_episodes=n_eval_episodes,
            eval_freq=eval_freq,
            deterministic=deterministic,
            verbose=verbose,
        )
        self.trial = trial
        self.eval_idx = 0
        self.is_pruned = False

    def _on_step(self) -> bool:
        if self.eval_freq > 0 and self.n_calls % self.eval_freq == 0:
            super()._on_step()
            self.eval_idx += 1
            self.trial.report(self.last_mean_reward, self.eval_idx)
            # Prune trial if need
            if self.trial.should_prune():
                self.is_pruned = True
                return False
        return True

### Model Tuning

In [42]:
def objective(trial: optuna.Trial) -> float:

    kwargs = DEFAULT_HYPERPARAMS.copy()
    # Sample hyperparameters
    kwargs.update(sample_DQN_params(trial))
    # Create our model
    model = DQN(**kwargs)
    # Wrapping env used for evaluation
    eval_env = Monitor(env)
    eval_env = DummyVecEnv([lambda: eval_env])
    eval_env = VecTransposeImage(eval_env)    
    # Create the callback that will periodically evaluate
    # and report the performance
    eval_callback = TrialEvalCallback(
        eval_env, trial, n_eval_episodes=N_EVAL_EPISODES, eval_freq=EVAL_FREQ, deterministic=True
    )

    nan_encountered = False
    try:
        model.learn(N_TIMESTEPS, callback=eval_callback)
    except AssertionError as e:
        # Sometimes, random hyperparams can generate NaN
        print(e)
        nan_encountered = True
    finally:
        # Free memory
        model.env.close()
        eval_env.close()

    # Tell the optimizer that the trial failed
    if nan_encountered:
        return float("nan")

    if eval_callback.is_pruned:
        raise optuna.exceptions.TrialPruned()

    return eval_callback.last_mean_reward


if __name__ == "__main__":
    # Set pytorch num threads to 1 for faster training
    torch.set_num_threads(1)

    sampler = TPESampler(n_startup_trials=N_STARTUP_TRIALS)
    # Do not prune before 1/3 of the max budget is used
    pruner = MedianPruner(n_startup_trials=N_STARTUP_TRIALS, n_warmup_steps=N_EVALUATIONS // 3)

    study = optuna.create_study(sampler=sampler, pruner=pruner, direction="maximize")
    try:
        study.optimize(objective, n_trials=N_TRIALS, timeout=600)
    except KeyboardInterrupt:
        pass

    print("Number of finished trials: ", len(study.trials))

    print("Best trial:")
    trial = study.best_trial

    print("  Value: ", trial.value)

    print("  Params: ")
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

    print("  User attrs:")
    for key, value in trial.user_attrs.items():
        print("    {}: {}".format(key, value))

[32m[I 2022-04-19 22:48:33,765][0m A new study created in memory with name: no-name-db4344f9-1bdf-4fb4-a5ba-c905241cd224[0m
[32m[I 2022-04-19 22:48:56,484][0m Trial 0 finished with value: -300.0 and parameters: {'gamma': 0.0009650797921447411, 'buffer_size ': 1000, 'lr': 0.04056457586217798, 'max_grad_norm': 4.269871692268495, 'batch_size': 64, 'net_arch': 'tiny', 'activation_fn': 'relu', 'seed ': 2}. Best is trial 0 with value: -300.0.[0m
[32m[I 2022-04-19 22:49:20,502][0m Trial 1 finished with value: -300.0 and parameters: {'gamma': 0.0020954332430213047, 'buffer_size ': 100000, 'lr': 7.401715439223582e-05, 'max_grad_norm': 2.7585881320921772, 'batch_size': 32, 'net_arch': 'tiny', 'activation_fn': 'relu', 'seed ': 4}. Best is trial 0 with value: -300.0.[0m
[32m[I 2022-04-19 22:49:44,216][0m Trial 2 finished with value: -300.0 and parameters: {'gamma': 0.0755636059450642, 'buffer_size ': 100000, 'lr': 0.10040713616491531, 'max_grad_norm': 0.8752237129600708, 'batch_size': 6

[32m[I 2022-04-19 22:57:54,520][0m Trial 25 finished with value: -73.33333333333333 and parameters: {'gamma': 0.005630025133523602, 'buffer_size ': 10000, 'lr': 5.899082903004146e-05, 'max_grad_norm': 1.1354562766442862, 'batch_size': 128, 'net_arch': 'small', 'activation_fn': 'tanh', 'seed ': 1}. Best is trial 6 with value: 95.0.[0m
[32m[I 2022-04-19 22:58:17,556][0m Trial 26 finished with value: -300.0 and parameters: {'gamma': 0.0004705877190887067, 'buffer_size ': 100000, 'lr': 2.5394704902101727e-05, 'max_grad_norm': 2.9542213610448136, 'batch_size': 64, 'net_arch': 'small', 'activation_fn': 'relu', 'seed ': 4}. Best is trial 6 with value: 95.0.[0m
[32m[I 2022-04-19 22:58:35,468][0m Trial 27 finished with value: 82.33333333333333 and parameters: {'gamma': 0.002412684156061744, 'buffer_size ': 1000, 'lr': 0.0011893986200722858, 'max_grad_norm': 0.7284822446429858, 'batch_size': 32, 'net_arch': 'small', 'activation_fn': 'tanh', 'seed ': 1}. Best is trial 6 with value: 95.0.

Number of finished trials:  28
Best trial:
  Value:  95.0
  Params: 
    gamma: 0.0021813151695808546
    buffer_size : 100000
    lr: 0.00015581771449402266
    max_grad_norm: 2.8095652180149444
    batch_size: 64
    net_arch: small
    activation_fn: tanh
    seed : 1
  User attrs:
    gamma_: 0.9978186848304191


## Create and train our tuned model

Having completed our hyperparameter search, we plug in the hyperparameters generated from the search into our model along with the network archtecture values.

We supply the searched network architecture, orth_init and activation function using policy kwargs

In [48]:
policy_kwargs = dict(    
    activation_fn = th.nn.Tanh,
    net_arch = [32,32,64],
)

Create our model using searched hyperparameters and policy kwargs

In [49]:
model = DQN("CnnPolicy", env, tensorboard_log=LOG_DIR, verbose=1,
           gamma = 0.9978186848304191, max_grad_norm = 2.8095652180149444,
           buffer_size = 100000, learning_rate = 0.00015581771449402266,
           batch_size = 64,seed = 1,optimize_memory_usage = True, policy_kwargs=policy_kwargs)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


Train our agent using a specified number of timesteps

In [50]:
model.learn(total_timesteps=100000,callback=callback)

Logging to ./train/log_basic3\DQN_9
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 4.25     |
|    ep_rew_mean      | 84.8     |
|    exploration_rate | 0.998    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 293      |
|    time_elapsed     | 0        |
|    total_timesteps  | 17       |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 13.9     |
|    ep_rew_mean      | 24.1     |
|    exploration_rate | 0.989    |
| time/               |          |
|    episodes         | 8        |
|    fps              | 772      |
|    time_elapsed     | 0        |
|    total_timesteps  | 111      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 25.5     |
|    ep_rew_mean      | -46.2    |
|    exploration_rate | 0.971    |
| time/            

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 27.2     |
|    ep_rew_mean      | -48.1    |
|    exploration_rate | 0.762    |
| time/               |          |
|    episodes         | 92       |
|    fps              | 1027     |
|    time_elapsed     | 2        |
|    total_timesteps  | 2506     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 27.3     |
|    ep_rew_mean      | -48.4    |
|    exploration_rate | 0.751    |
| time/               |          |
|    episodes         | 96       |
|    fps              | 1030     |
|    time_elapsed     | 2        |
|    total_timesteps  | 2619     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 27.7     |
|    ep_rew_mean      | -52      |
|    exploration_rate | 0.736    |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 33.8     |
|    ep_rew_mean      | -89      |
|    exploration_rate | 0.48     |
| time/               |          |
|    episodes         | 180      |
|    fps              | 1033     |
|    time_elapsed     | 5        |
|    total_timesteps  | 5471     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 32.5     |
|    ep_rew_mean      | -80.8    |
|    exploration_rate | 0.471    |
| time/               |          |
|    episodes         | 184      |
|    fps              | 1035     |
|    time_elapsed     | 5        |
|    total_timesteps  | 5572     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 33.7     |
|    ep_rew_mean      | -87.8    |
|    exploration_rate | 0.453    |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 32.6     |
|    ep_rew_mean      | -81.5    |
|    exploration_rate | 0.205    |
| time/               |          |
|    episodes         | 268      |
|    fps              | 973      |
|    time_elapsed     | 8        |
|    total_timesteps  | 8369     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 32       |
|    ep_rew_mean      | -77.5    |
|    exploration_rate | 0.197    |
| time/               |          |
|    episodes         | 272      |
|    fps              | 974      |
|    time_elapsed     | 8        |
|    total_timesteps  | 8453     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 30.3     |
|    ep_rew_mean      | -67.3    |
|    exploration_rate | 0.196    |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 20.2     |
|    ep_rew_mean      | -7.06    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 356      |
|    fps              | 974      |
|    time_elapsed     | 10       |
|    total_timesteps  | 10162    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 20.8     |
|    ep_rew_mean      | -10.9    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 360      |
|    fps              | 971      |
|    time_elapsed     | 10       |
|    total_timesteps  | 10271    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 21.2     |
|    ep_rew_mean      | -12.8    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 30.2     |
|    ep_rew_mean      | -69.7    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 444      |
|    fps              | 984      |
|    time_elapsed     | 13       |
|    total_timesteps  | 13029    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 30.5     |
|    ep_rew_mean      | -71.1    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 448      |
|    fps              | 984      |
|    time_elapsed     | 13       |
|    total_timesteps  | 13066    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 30.6     |
|    ep_rew_mean      | -71.5    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 36.5     |
|    ep_rew_mean      | -104     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 532      |
|    fps              | 989      |
|    time_elapsed     | 16       |
|    total_timesteps  | 16361    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 36.4     |
|    ep_rew_mean      | -103     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 536      |
|    fps              | 989      |
|    time_elapsed     | 16       |
|    total_timesteps  | 16503    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 36.1     |
|    ep_rew_mean      | -101     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.2     |
|    ep_rew_mean      | -56.2    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 620      |
|    fps              | 994      |
|    time_elapsed     | 18       |
|    total_timesteps  | 18662    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 27.9     |
|    ep_rew_mean      | -53.6    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 624      |
|    fps              | 989      |
|    time_elapsed     | 18       |
|    total_timesteps  | 18755    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 26.9     |
|    ep_rew_mean      | -47.9    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 31.7     |
|    ep_rew_mean      | -76      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 708      |
|    fps              | 988      |
|    time_elapsed     | 21       |
|    total_timesteps  | 21621    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 32       |
|    ep_rew_mean      | -76.5    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 712      |
|    fps              | 989      |
|    time_elapsed     | 21       |
|    total_timesteps  | 21760    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 33       |
|    ep_rew_mean      | -82.4    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 32.9     |
|    ep_rew_mean      | -82.6    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 796      |
|    fps              | 996      |
|    time_elapsed     | 24       |
|    total_timesteps  | 24590    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 32       |
|    ep_rew_mean      | -77.6    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 800      |
|    fps              | 995      |
|    time_elapsed     | 24       |
|    total_timesteps  | 24622    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 33.2     |
|    ep_rew_mean      | -84.6    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 30.3     |
|    ep_rew_mean      | -70.5    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 884      |
|    fps              | 999      |
|    time_elapsed     | 27       |
|    total_timesteps  | 27040    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 30.5     |
|    ep_rew_mean      | -70.7    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 888      |
|    fps              | 999      |
|    time_elapsed     | 27       |
|    total_timesteps  | 27171    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.6     |
|    ep_rew_mean      | -59.5    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 34.6     |
|    ep_rew_mean      | -98.1    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 972      |
|    fps              | 1000     |
|    time_elapsed     | 30       |
|    total_timesteps  | 30225    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 34.7     |
|    ep_rew_mean      | -98.8    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 976      |
|    fps              | 1000     |
|    time_elapsed     | 30       |
|    total_timesteps  | 30322    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 37.1     |
|    ep_rew_mean      | -113     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 31.7     |
|    ep_rew_mean      | -74.4    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1060     |
|    fps              | 1000     |
|    time_elapsed     | 32       |
|    total_timesteps  | 32904    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.9     |
|    ep_rew_mean      | -63.7    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1064     |
|    fps              | 998      |
|    time_elapsed     | 32       |
|    total_timesteps  | 32919    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 30.8     |
|    ep_rew_mean      | -69.4    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29       |
|    ep_rew_mean      | -55.3    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1148     |
|    fps              | 1000     |
|    time_elapsed     | 35       |
|    total_timesteps  | 35406    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 27.7     |
|    ep_rew_mean      | -47.5    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1152     |
|    fps              | 1000     |
|    time_elapsed     | 35       |
|    total_timesteps  | 35447    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 27.5     |
|    ep_rew_mean      | -47.6    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.6     |
|    ep_rew_mean      | -61.1    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1236     |
|    fps              | 998      |
|    time_elapsed     | 38       |
|    total_timesteps  | 38087    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.1     |
|    ep_rew_mean      | -64.8    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1240     |
|    fps              | 998      |
|    time_elapsed     | 38       |
|    total_timesteps  | 38170    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.4     |
|    ep_rew_mean      | -60.2    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.9     |
|    ep_rew_mean      | -67.5    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1324     |
|    fps              | 999      |
|    time_elapsed     | 40       |
|    total_timesteps  | 40767    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 29.5     |
|    ep_rew_mean      | -64.6    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1328     |
|    fps              | 999      |
|    time_elapsed     | 40       |
|    total_timesteps  | 40823    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 28.7     |
|    ep_rew_mean      | -59.4    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 23       |
|    ep_rew_mean      | -22.2    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1412     |
|    fps              | 999      |
|    time_elapsed     | 42       |
|    total_timesteps  | 42756    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 21.9     |
|    ep_rew_mean      | -15.1    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1416     |
|    fps              | 999      |
|    time_elapsed     | 42       |
|    total_timesteps  | 42799    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 22.1     |
|    ep_rew_mean      | -16.4    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 26.6     |
|    ep_rew_mean      | -45.3    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1500     |
|    fps              | 995      |
|    time_elapsed     | 45       |
|    total_timesteps  | 45195    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 26.3     |
|    ep_rew_mean      | -43.7    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1504     |
|    fps              | 995      |
|    time_elapsed     | 45       |
|    total_timesteps  | 45216    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 26.1     |
|    ep_rew_mean      | -42.6    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 33.6     |
|    ep_rew_mean      | -90.9    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1588     |
|    fps              | 997      |
|    time_elapsed     | 48       |
|    total_timesteps  | 48318    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 34.1     |
|    ep_rew_mean      | -94.3    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1592     |
|    fps              | 998      |
|    time_elapsed     | 48       |
|    total_timesteps  | 48487    |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 34.8     |
|    ep_rew_mean      | -97.6    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes       

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 31.2     |
|    ep_rew_mean      | -80.8    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1668     |
|    fps              | 748      |
|    time_elapsed     | 68       |
|    total_timesteps  | 50955    |
| train/              |          |
|    learning_rate    | 0.000156 |
|    loss             | 0.884    |
|    n_updates        | 238      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 32.6     |
|    ep_rew_mean      | -90.5    |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1672     |
|    fps              | 718      |
|    time_elapsed     | 71       |
|    total_timesteps  | 51109    |
| train/              |          |
|    learning_rate    | 0.000156 |
|    loss             | 0.915    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 55.7     |
|    ep_rew_mean      | -218     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1732     |
|    fps              | 366      |
|    time_elapsed     | 150      |
|    total_timesteps  | 55255    |
| train/              |          |
|    learning_rate    | 0.000156 |
|    loss             | 5.37     |
|    n_updates        | 1313     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 57       |
|    ep_rew_mean      | -225     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1736     |
|    fps              | 355      |
|    time_elapsed     | 156      |
|    total_timesteps  | 55555    |
| train/              |          |
|    learning_rate    | 0.000156 |
|    loss             | 2.29     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 73.3     |
|    ep_rew_mean      | -296     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1796     |
|    fps              | 249      |
|    time_elapsed     | 240      |
|    total_timesteps  | 59948    |
| train/              |          |
|    learning_rate    | 0.000156 |
|    loss             | 2.23     |
|    n_updates        | 2486     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 72.6     |
|    ep_rew_mean      | -294     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1800     |
|    fps              | 246      |
|    time_elapsed     | 244      |
|    total_timesteps  | 60175    |
| train/              |          |
|    learning_rate    | 0.000156 |
|    loss             | 3.83     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 65.5     |
|    ep_rew_mean      | -259     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1860     |
|    fps              | 201      |
|    time_elapsed     | 316      |
|    total_timesteps  | 63823    |
| train/              |          |
|    learning_rate    | 0.000156 |
|    loss             | 3.86     |
|    n_updates        | 3455     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 65.5     |
|    ep_rew_mean      | -259     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1864     |
|    fps              | 199      |
|    time_elapsed     | 321      |
|    total_timesteps  | 64123    |
| train/              |          |
|    learning_rate    | 0.000156 |
|    loss             | 2.02     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 72.1     |
|    ep_rew_mean      | -289     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1924     |
|    fps              | 171      |
|    time_elapsed     | 399      |
|    total_timesteps  | 68334    |
| train/              |          |
|    learning_rate    | 0.000156 |
|    loss             | 3.62     |
|    n_updates        | 4583     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 72.1     |
|    ep_rew_mean      | -289     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1928     |
|    fps              | 169      |
|    time_elapsed     | 404      |
|    total_timesteps  | 68634    |
| train/              |          |
|    learning_rate    | 0.000156 |
|    loss             | 2.02     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 62.2     |
|    ep_rew_mean      | -241     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1988     |
|    fps              | 153      |
|    time_elapsed     | 467      |
|    total_timesteps  | 71994    |
| train/              |          |
|    learning_rate    | 0.000156 |
|    loss             | 3.95     |
|    n_updates        | 5498     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 62.4     |
|    ep_rew_mean      | -243     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 1992     |
|    fps              | 152      |
|    time_elapsed     | 473      |
|    total_timesteps  | 72294    |
| train/              |          |
|    learning_rate    | 0.000156 |
|    loss             | 0.567    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 63.9     |
|    ep_rew_mean      | -246     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2052     |
|    fps              | 137      |
|    time_elapsed     | 557      |
|    total_timesteps  | 76684    |
| train/              |          |
|    learning_rate    | 0.000156 |
|    loss             | 0.462    |
|    n_updates        | 6670     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 66.1     |
|    ep_rew_mean      | -257     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2056     |
|    fps              | 136      |
|    time_elapsed     | 562      |
|    total_timesteps  | 76984    |
| train/              |          |
|    learning_rate    | 0.000156 |
|    loss             | 0.143    |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 62.3     |
|    ep_rew_mean      | -236     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2116     |
|    fps              | 128      |
|    time_elapsed     | 624      |
|    total_timesteps  | 80252    |
| train/              |          |
|    learning_rate    | 0.000156 |
|    loss             | 2.25     |
|    n_updates        | 7562     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 59.4     |
|    ep_rew_mean      | -220     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2120     |
|    fps              | 128      |
|    time_elapsed     | 625      |
|    total_timesteps  | 80260    |
| train/              |          |
|    learning_rate    | 0.000156 |
|    loss             | 3.61     |
|    n_updates      

----------------------------------
| rollout/            |          |
|    ep_len_mean      | 49       |
|    ep_rew_mean      | -166     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2180     |
|    fps              | 120      |
|    time_elapsed     | 692      |
|    total_timesteps  | 83546    |
| train/              |          |
|    learning_rate    | 0.000156 |
|    loss             | 4.39     |
|    n_updates        | 8386     |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 49       |
|    ep_rew_mean      | -166     |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 2184     |
|    fps              | 119      |
|    time_elapsed     | 698      |
|    total_timesteps  | 83846    |
| train/              |          |
|    learning_rate    | 0.000156 |
|    loss             | 0.76     |
|    n_updates      

KeyboardInterrupt: 

### Test and evaluate our model

We will evaluate our model in the video game environment to determine its performance

We load our best trained model to use for evaluation

In [None]:
model = PPO.load('./train/train_basic2/best_model_80000')

Create and render our environment to see the agents performance

Create and render our environment to see the agents performance

In [None]:
env = VizDoomGym(render=True)

In [None]:
#loop through each game
for episode in range(10):
    obs = env.reset()
    done = False
    total_reward = 0
    while not done:
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        time.sleep(0.20)
        total_reward += reward
    print('Total Reward for episode {} is {}'.format(total_reward, episode))
    time.sleep(2)