# Restore agent from checkpoint

In [None]:
#!pip3 install -U "ray[rllib]" torch

In [1]:
# imports
import gym, ray
from ray.rllib.env.env_context import EnvContext
import ray.rllib.agents.dqn as dqn
from helpers.advanced_map import AdvancedMap

from IPython.display import display
import matplotlib.pyplot as plt
%matplotlib inline
from helpers.r_matrix import r_matrix
from helpers.get_available_actions import get_available_actions, adv_actions, adv_action_from_index
import numpy as np
import random
import pandas as pd

from ray.tune.logger import pretty_print
from ray.rllib.models import ModelCatalog


In [2]:
class AdvancedMapEnv(gym.Env):
  """Class that wraps the advanced map to make it compatible with RLLib
  
  Functions:
  __init__: takes in a config and creates the environment
  
  convert_observations: takes in a dictionary and returns a numpy array
  
  seed: set the random seed, with an optional integer
  
  step: take an action and see what happens
  
  reset: reset the environment back to the beginning"""
  
  def __init__(self, config: EnvContext):
    """Starts up the environment
    
    Inputs:
    config: an EnvContext"""
    # super(AdvancedMapEnv, self).__init__()
    # create an advanced map object
    self.advanced_map = AdvancedMap()

    # Define the action space
    # 5 actions: up, down, left, right, shoot
    self.action_space = gym.spaces.Discrete(5)

    # define the observation space, a 53 column box of floats between -1 and 1
    self.observation_space = gym.spaces.Box(low=-1., high=1., shape=[53,], dtype=np.float32)
  
  def reset(self) -> np.array:
      """Reset the environment and send back the observations"""
      # send through the reset command
      obs = self.advanced_map.reset(pos=None)
      # convert the observations
      return self.convert_observations(obs)
  
  def step(self, action: int):
    """take the step, assuming that the action is valid"""
    assert action in [0, 1, 2, 3, 4]

    # get the action
    actionstr = adv_action_from_index(action)

    # pass in the step
    obs = self.advanced_map.step(actionstr)

    rew = obs['immediate_reward']

    # return obs, reward and done
    return self.convert_observations(obs), rew, obs['is_stop'], {}

  def convert_observations(self, obs: dict) -> np.array:
    """Here we take in the dictionary of observations from the environment and returns a normalised numpy array
    
    The dictionary contains:
    is_stop: a boolean, used internally to determine whether to stop the episode
    immediate_reward: the points we got in the last round
    enemy_count: how many active enemies there are
    agent_view: the 7x7 view of the surroundings (values between 0-8)
    obj_direction: the relative direction to the objective (0-max width, 0-max height)
    agent_health: the health of the agent, between 0-100"""
    # sort out the relative coordinates, so that the directions are divided by the size of the environment
    obj_direction = np.divide(np.array(list(obs['obj_direction'])), np.array([21., 29.])) # TODO dynamically get the size

    # normalise the view of the surroundings
    agent_view = obs['agent_view'].ravel() / 8. # use ravel to reshape into a 1 row list and normalise it
    
    # enemy_count
    enemy_count = np.array(obs['enemy_count'] / 3.) # TODO dynamically get the max number of enemies
    
    # agent_health
    agent_health = np.array(obs['agent_health'] / 100.)
    
    # concatenate into a single numpy array, 1 row, 2 + 49 + 1 + 1 = 53 columns between -1 & 1
    return np.concatenate([obj_direction, agent_view, [enemy_count], [agent_health]])

    def seed(self, seed=None) -> None:
      """Set the random seed"""
      random.seed(seed)

In [9]:
# get the default config to use
config = dqn.DEFAULT_CONFIG.copy()
# use torch
config['framework'] = 'torch'
# set the environment
config['env'] = AdvancedMapEnv
# disable duelling
config['dueling'] = False
# disable double Q
config['double_q'] = False
# set the hidden units to 64 then 64
#config['hiddens'] = [64, 64]
# use relu (REctified Linear Units) activation
config['model']['post_fcnet_activation'] = 'relu'
config['model']['fcnet_hiddens'] = [256, 256]
# set the gamma to 0.9, best parameter from the grid search
config['gamma'] = 0.9

# visualise what the agent is doing
# using code from https://docs.ray.io/en/latest/rllib/rllib-training.html (computing actions)
# setup environment
env = AdvancedMapEnv(config)
# setup the agent and restore from the checkpoint
agent = dqn.DQNTrainer(config=config)
# agent.load_checkpoint("data/test1/checkpoint-1")
agent.restore('/root/ray_results/DQNTrainer_AdvancedMapEnv_2022-04-15_19-32-3355r71h0w/checkpoint_000001/checkpoint-1')
#agent.restore("/content/gdrive/My Drive/Colab Notebooks/DRL/DRL Coursework/data/test1/checkpoint-1")
#agent.restore("/root/ray_results/DQNTrainer_AdvancedMapEnv_2022-04-15_18-44-26rvaetiev/checkpoint_000001/checkpoint-1")
#agent.train()
#agent.save()

#training_loop(config, 'test1', 'test1', num_epochs=1, is_save=True)

Install gputil for GPU system monitoring.
2022-04-15 19:33:30,019	INFO trainable.py:534 -- Restored on 172.17.0.2 from checkpoint: /root/ray_results/DQNTrainer_AdvancedMapEnv_2022-04-15_19-32-3355r71h0w/checkpoint_000001/checkpoint-1
2022-04-15 19:33:30,021	INFO trainable.py:543 -- Current state after restoring: {'_iteration': 1, '_timesteps_total': 32, '_time_total': 2.429262399673462, '_episodes_total': 10}
