# Setup

In [1]:
%%bash
# Install deps from 
# https://github.com/mwydmuch/ViZDoom/blob/master/doc/Building.md#-linux

apt-get update  &> /dev/null


apt-get install build-essential zlib1g-dev libsdl2-dev libjpeg-dev \
nasm tar libbz2-dev libgtk2.0-dev cmake git libfluidsynth-dev libgme-dev \
libopenal-dev timidity libwildmidi-dev unzip  &> /dev/null

# Boost libraries
apt-get install libboost-all-dev  &> /dev/null

In [2]:
!pip install vizdoom --quiet
!pip install ray --quiet
!pip install ray['rllib'] --quiet
!pip install Ipython --upgrade --quiet

In [3]:
system_path = '/content/drive/MyDrive/GitHub/INM363-Project'
from google.colab import drive
drive.mount('/content/drive')
import sys
sys.path.append(system_path)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
from src.vizdoom_gym.envs.VizDoomEnv import VizdoomEnv
from src.vizdoom_gym.envs.VizDoomEnv_def import VizDoomVeryDenseReward

In [5]:
from ray.tune.registry import register_env
import gym
import os
import ray
import ray.rllib.agents.ppo as ppo
import shutil
import torch

  and should_run_async(code)


In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("device: ", device, "\n")

from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

device:  cuda:0 

Your runtime has 89.6 gigabytes of available RAM

You are using a high-RAM runtime!


  and should_run_async(code)


# Initialize Ray

In [7]:
#need this to load vizdoom module 
system_path = '/content/drive/MyDrive/GitHub/INM363-Project/src' 
sys.path.append(system_path)

#need this to use gpu on ray 
os.environ['PYTHONPATH'] = '/content/drive/MyDrive/GitHub/INM363-Project' 
os.environ['PYTHONPATH']

'/content/drive/MyDrive/GitHub/INM363-Project'

In [8]:
#chkpt_root = "/content/drive/MyDrive/GitHub/INM363-Project/model_checkpoints/icm/dense_new_pattern"
#shutil.rmtree(chkpt_root, ignore_errors=True, onerror=None)


ray.shutdown()
print("Shutdown ray")

# start Ray -- add `local_mode=True` here for debugging
ray.init(ignore_reinit_error=True,  num_cpus =2, num_gpus = 1) #local_mode=True,

#ray.init(num_cpus= 2, num_gpus=1)

print("Initialized ray")

# register the custom environment
select_env = "VizDoomVeryDenseReward-v0"

register_env(select_env, lambda config: VizDoomVeryDenseReward())
#register_env(select_env, lambda config: VizdoomEnv())

print("registered environment")


Shutdown ray


2022-09-28 16:57:26,321	INFO worker.py:1518 -- Started a local Ray instance.


Initialized ray
registered environment


Training config

In [9]:
# configure the environment and create agent
config = ppo.DEFAULT_CONFIG.copy()
config["log_level"] = "WARN"
#config["num_workers"] = 1
config["framework"] = "torch"
config["model"] = {"dim": 42, 
                   "grayscale": True,
                   }
config["num_gpus"] = 1
config["preprocessor_pref"] = "rllib"
config['explore'] = True 
#config['batch_mode'] = 'complete_episodes'


In [14]:

#activating curiosity as the exploration class : https://docs.ray.io/en/latest/rllib/rllib-algorithms.html

#set to 0 because of: https://discuss.ray.io/t/scaling-curiosity-like-exploration-modules-on-multiple-workers/2267
config["num_workers"] = 0 

config["exploration_config"] = {
    "type": "Curiosity",  # <- Use the Curiosity module for exploring.
    "eta": 0.0001, #0.001,  # Weight for intrinsic rewards before being added to extrinsic ones.
    "lr": 0.001,  # Learning rate of the curiosity (ICM) module.
    "feature_dim": 288,  # Dimensionality of the generated feature vectors.
    # Setup of the feature net (used to encode observations into feature (latent) vectors).
    "feature_net_config": {
        "fcnet_hiddens": [],
        "fcnet_activation": "relu",
    },
    "inverse_net_hiddens": [256],  # Hidden layers of the "inverse" model.
    "inverse_net_activation": "relu",  # Activation of the "inverse" model.
    "forward_net_hiddens": [256],  # Hidden layers of the "forward" model.
    "forward_net_activation": "relu",  # Activation of the "forward" model.
    "beta": 0.2,  # Weight for the "forward" loss (beta) over the "inverse" loss (1.0 - beta).
    # Specify, which exploration sub-type to use (usually, the algo's "default"
    # exploration, e.g. EpsilonGreedy for DQN, StochasticSampling for PG/SAC).
    "sub_exploration": {
        "type": "StochasticSampling",
    }
}


"""
used vf_clip = 400 for easy and new_dense settings 
= 600 for sparse settings 
= 10 (deafult) for easy no reward and no reward settings 
"""

config["vf_clip_param"] = 138148

#changed due to warning
# Clip param for the value function. Note that this is sensitive to the
# scale of the rewards. If your expected V is large, increase this. (previosuly 10) 
#2022-08-30 17:15:25,928	WARNING ppo.py:465 -- The mean reward returned from the environment is 5066.82568359375 but the
# vf_clip_param is set to 100. Consider increasing it for policy: default_policy to improve value function convergence.

#config["vf_clip_param"] = 10000  # changin this back to try scaled reward setting 
#100
#2022-09-01 12:20:27,151	WARNING ppo.py:465 -- The mean reward returned from the environment is 16827.6640625 but the vf_clip_param is set to 10.0. Consider increasing it for policy: default_policy to improve value function convergence.



agent = ppo.PPOTrainer(config, env=select_env)

print("created agent")

config file: /content/drive/MyDrive/GitHub/INM363-Project/scenarios/custom/very_dense_reward.cfg
scenario file: /content/drive/MyDrive/GitHub/INM363-Project/scenarios/custom/train/dense_new_pattern_rs.wad
episode timeout: 800
screen resolution: 320X240




created agent


**Training** loop

In [15]:
import pandas as pd
import time 

cols = ["checkpoint", "eps_reward_min", "eps_reward_mean", "eps_reward_max", "eps_len_mean", "episodes_this_iter"]
results_df = pd.DataFrame(columns = cols) 

In [16]:
chkpt_root = "/content/drive/MyDrive/GitHub/INM363-Project/model_checkpoints/icm/dense_new_pattern"
chkpt_file  = "/content/drive/MyDrive/GitHub/INM363-Project/model_checkpoints/icm/dense_new_pattern/checkpoint_001100"
agent.restore(chkpt_file)

2022-09-28 16:58:46,758	INFO trainable.py:669 -- Restored on 172.28.0.2 from checkpoint: /content/drive/MyDrive/GitHub/INM363-Project/model_checkpoints/icm/dense_new_pattern/checkpoint_001100
2022-09-28 16:58:46,760	INFO trainable.py:677 -- Current state after restoring: {'_iteration': 1100, '_timesteps_total': None, '_time_total': 75290.79979395866, '_episodes_total': 15726}


In [17]:
status = "{:2d} reward {:6.2f}/{:6.2f}/{:6.2f} len {:4.2f}"
start_n = 300
n_iter = 600

print("started training loop")
time_start = time.time() 

chkpt_root = chkpt_root = "/content/drive/MyDrive/GitHub/INM363-Project/model_checkpoints/icm/dense_new_pattern"


# train a policy with RLlib using PPO
for n in range(start_n, n_iter):
  
    result = agent.train()

    #change this to  10 or 20 
    if (n+1) % 30 == 0 or n == 0: 
      chkpt_file = agent.save(chkpt_root)
      print(f"Saved checkpoint {n+1} at {chkpt_file}")
    #chkpt_file = "not saving checkpoints"

    print(status.format(
        n + 1,
        result["episode_reward_min"],
        result["episode_reward_mean"],
        result["episode_reward_max"],
        result["episode_len_mean"]
    ))

    #save metrics
    row = {'checkpoint': n+1,
       "eps_reward_min": result["episode_reward_min"],
       "eps_reward_mean": result["episode_reward_mean"],
       "eps_reward_max": result["episode_reward_max"],
       "eps_len_mean": result["episode_len_mean"],
       "episodes_this_iter": result["episodes_this_iter"]
       }
    results_df = results_df.append(row, ignore_index = True)


print(f"Total time elapsed: {(time.time()-time_start)/60}")

print("ending training loop")

ray.shutdown()
print("shutdown ray")

started training loop
301 reward   0.00/  2.60/ 10.00 len 190.20
302 reward   0.00/  3.09/ 10.00 len 181.81
303 reward   0.00/  3.11/ 10.00 len 179.29
304 reward   0.00/  3.15/ 10.00 len 179.97
305 reward   0.00/  3.13/ 10.00 len 178.37
306 reward   0.00/  2.89/ 10.00 len 180.26
307 reward   0.00/  2.72/ 10.00 len 182.25
308 reward   0.00/  2.45/ 10.00 len 186.15
309 reward   0.00/  2.45/ 10.00 len 186.21
310 reward   0.00/  2.62/ 10.00 len 186.21
311 reward   0.00/  3.08/ 13.00 len 177.70
312 reward   0.00/  3.39/ 14.00 len 175.57
313 reward   0.00/  3.55/ 14.00 len 173.63
314 reward   0.00/  3.35/ 14.00 len 177.77
315 reward   0.00/  3.24/ 14.00 len 181.70
316 reward   0.00/  3.25/ 14.00 len 178.48
317 reward   0.00/  3.17/ 13.00 len 176.76
318 reward   0.00/  3.05/ 13.00 len 178.74
319 reward   0.00/  3.10/ 10.00 len 176.66
320 reward   0.00/  3.37/ 13.00 len 169.51
321 reward   0.00/  3.15/ 13.00 len 173.32
322 reward   0.00/  2.96/ 13.00 len 173.32
323 reward   0.00/  3.08/ 13.00 

# save results file

In [18]:
from pathlib import Path 

fname = chkpt_root + '/result_2.csv'
fpath = Path(fname)
fpath.parent.mkdir(parents=True, exist_ok = True)
results_df.to_csv(fpath)
print(f"Saved results file to {fname}")


Saved results file to /content/drive/MyDrive/GitHub/INM363-Project/model_checkpoints/icm/dense_new_pattern/result_2.csv


In [23]:
import pandas as pandas
import os 
from pathlib import Path 


chkpt_root = "/content/drive/MyDrive/GitHub/INM363-Project/model_checkpoints/icm/dense_new_pattern" 

results_pth = chkpt_root + '/' + 'result_1.csv'
df1 = pd.read_csv(results_pth) 
df1.tail()


results_pth = chkpt_root + '/' + 'result_2.csv'
df2 = pd.read_csv(results_pth) 
df2.tail()

Unnamed: 0.1,Unnamed: 0,checkpoint,eps_reward_min,eps_reward_mean,eps_reward_max,eps_len_mean,episodes_this_iter
295,295,596.0,0.0,2.05,13.0,184.46,22.0
296,296,597.0,0.0,2.4,15.0,180.69,25.0
297,297,598.0,0.0,2.81,15.0,172.94,25.0
298,298,599.0,0.0,2.71,15.0,174.99,21.0
299,299,600.0,0.0,2.49,15.0,178.77,20.0


In [24]:

frames = [df1, df2]
df = pd.concat(frames, ignore_index = True)
df.tail()


fname = chkpt_root + '/result.csv'
fpath = Path(fname)
fpath.parent.mkdir(parents=True, exist_ok = True)
df.to_csv(fpath)
print(f"Saved results file to {fname}")

Saved results file to /content/drive/MyDrive/GitHub/INM363-Project/model_checkpoints/icm/dense_new_pattern/result.csv
