# Setup 



In [None]:
%%bash
# Install deps from 
# https://github.com/mwydmuch/ViZDoom/blob/master/doc/Building.md#-linux

apt-get update


apt-get install build-essential zlib1g-dev libsdl2-dev libjpeg-dev \
nasm tar libbz2-dev libgtk2.0-dev cmake git libfluidsynth-dev libgme-dev \
libopenal-dev timidity libwildmidi-dev unzip

# Boost libraries
apt-get install libboost-all-dev

In [None]:
!pip install vizdoom
!pip install ray 
!pip install ray['rllib']
!pip install Ipython --upgrade


In [None]:
import os 
from google.colab import drive
import sys

#need this to load vizdoom module 
system_path = '/content/drive/MyDrive/GitHub/INM363-Project'
drive.mount('/content/drive')
sys.path.append(system_path)

system_path = '/content/drive/MyDrive/GitHub/INM363-Project/src' 
sys.path.append(system_path)

#need this to use gpu on ray 
os.environ['PYTHONPATH'] = '/content/drive/MyDrive/GitHub/INM363-Project' 
os.environ['PYTHONPATH']

In [None]:
from src.vizdoom_gym.envs.VizDoomEnv import VizdoomEnv
from src.vizdoom_gym.envs.VizDoomEnv_def import VizDoomVeryDenseReward

In [None]:
from ray.tune.registry import register_env
import gym
import ray
import ray.rllib.agents.ppo as ppo
import shutil
import torch

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("device: ", device, "\n")

from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

# Create and restore agent from checkpoint

In [None]:
ray.shutdown()
print("Shutdown ray")
# start Ray -- add `local_mode=True` here for debugging
ray.init(ignore_reinit_error=True,  num_cpus =2, num_gpus = 1) #local_mode=True,

print("Initialized ray")

# register the custom environment
select_env = "VizDoomVeryDenseReward-v0"

register_env(select_env, lambda config: VizDoomVeryDenseReward())

print("registered environment")


In [None]:
# configure the environment and create agent
config = ppo.DEFAULT_CONFIG.copy()
config["log_level"] = "WARN"
config["num_workers"] = 1
config["framework"] = "torch"
config["model"] = {"dim": 42, 
                   "grayscale": True,
                   }
config["num_gpus"] = 1
config["preprocessor_pref"] = "rllib"
config['batch_mode'] = 'complete_episodes'

#changing this for evaluation time 
config['explore'] = False 
config['in_evaluation'] = True


#config["horizon"] = 50
agent = ppo.PPOTrainer(config, env=select_env)

print("created agent")

In [None]:
# examine the trained policy
policy = agent.get_policy()
model = policy.model


#create environment 
env = gym.make('VizDoomVeryDenseReward-v0')
#,
#               config_file="custom\\very_dense_reward.cfg",
#                scenario_file="custom/test/easy_dense_reward_rs.wad") 
#               scenario_file="custom/test/dense_reward_fixed_start.wad") 

In [None]:
#restore agent from checkpoint 
chkpt_root = "/content/drive/MyDrive/GitHub/INM363-Project/model_checkpoints/ppo/easy_no_reward"
chkpt_dir = 'checkpoint_000010'

chkpt_file = chkpt_root + '/' + chkpt_dir 
print(chkpt_file)


In [None]:
agent.restore(chkpt_file)

In [None]:
state = env.reset()
sum_reward = 0
n_step = 200
path = [] 

for step in range(n_step):
    action = agent.compute_action(state)
    state, reward, done, info = env.step(action)
    sum_reward += reward
    
    if len(info) > 0:
      path.append((info["X"], info["Y"]))

    #env.render()

    if done == 1:
        # report at the end of each episode
        print("cumulative reward", sum_reward)
        print(f"total steps: {step}")
        state = env.reset()
        sum_reward = 0
        break

if done == 0:
  print("cumulative reward", sum_reward)
  print(f"total steps: {step}")


# Visualize Path taken by agent

In [None]:
import src.helper_fuctions as helper

sectors, health_pos, armor_pos = helper.get_env_layout(config = "custom/very_dense_reward.cfg",
                                                       scenario = "custom/train/easy_dense_reward_rs.wad" )
helper.plot_layout(sectors, health_pos, armor_pos, path)




# average reward over all checkpoints

# Sparse Setting

In [None]:
from fnmatch import fnmatch 

chkpt_root = "/content/drive/MyDrive/GitHub/INM363-Project/model_checkpoints/ppo/sparse" #_no_tpenalty"

pattern = '*checkpoint*'
checkpoints = [x for x in os.listdir(chkpt_root) if fnmatch(x,pattern )]


print("Total number of checkpoints: ", len(checkpoints))

chkpt_mean_rewards = [] 



for chkpt_dir in checkpoints:
  chkpt_pth = chkpt_root + '/' + chkpt_dir 
  print(chkpt_dir)
  agent.restore(chkpt_pth)

  num_episodes = 1
  total_reward = 0
  #chkpt_mean_reward = 0 

  for i in range(num_episodes):
    state = env.reset()
    sum_reward = 0
    n_step = 200

    for step in range(n_step):
          action = agent.compute_action(state)
          state, reward, done, info = env.step(action)
          sum_reward += reward

          if done == 1:
              total_reward += sum_reward 
              #print(f"total steps: {step}")
              state = env.reset()
              sum_reward = 0
              break

    if done == 0:
      total_reward += sum_reward 

    #print(total_reward)

  chkpt_mean_rewards.append(total_reward/num_episodes)


          
print(chkpt_mean_rewards)



In [None]:
import matplotlib.pyplot as plt
plt.plot(chkpt_mean_rewards)
plt.show()

# Dense Setting

In [None]:
from fnmatch import fnmatch 

chkpt_root = "/content/drive/MyDrive/GitHub/INM363-Project/model_checkpoints/ppo/dense" #_no_tpenalty"

pattern = '*checkpoint*'
checkpoints = [x for x in os.listdir(chkpt_root) if fnmatch(x,pattern )]


print("Total number of checkpoints: ", len(checkpoints))

chkpt_mean_rewards = [] 



for chkpt_dir in checkpoints:
  chkpt_pth = chkpt_root + '/' + chkpt_dir 
  print(chkpt_dir)
  agent.restore(chkpt_pth)

  num_episodes = 1
  total_reward = 0
  #chkpt_mean_reward = 0 

  for i in range(num_episodes):
    state = env.reset()
    sum_reward = 0
    n_step = 200

    for step in range(n_step):
          action = agent.compute_action(state)
          state, reward, done, info = env.step(action)
          sum_reward += reward

          if done == 1:
              total_reward += sum_reward 
              #print(f"total steps: {step}")
              state = env.reset()
              sum_reward = 0
              break

    if done == 0:
      total_reward += sum_reward 

    #print(total_reward)

  chkpt_mean_rewards.append(total_reward/num_episodes)


          
print(chkpt_mean_rewards)



In [None]:
import matplotlib.pyplot as plt
plt.plot(chkpt_mean_rewards)
plt.show()

# Easy Dense

In [None]:
from fnmatch import fnmatch 

chkpt_root = "/content/drive/MyDrive/GitHub/INM363-Project/model_checkpoints/ppo/easy_dense" #_no_tpenalty"

pattern = '*checkpoint*'
checkpoints = [x for x in os.listdir(chkpt_root) if fnmatch(x,pattern )]


print("Total number of checkpoints: ", len(checkpoints))

chkpt_mean_rewards = [] 



for chkpt_dir in checkpoints:
  chkpt_pth = chkpt_root + '/' + chkpt_dir 
  print(chkpt_dir)
  agent.restore(chkpt_pth)

  num_episodes = 1
  total_reward = 0
  #chkpt_mean_reward = 0 

  for i in range(num_episodes):
    state = env.reset()
    sum_reward = 0
    n_step = 200

    for step in range(n_step):
          action = agent.compute_action(state)
          state, reward, done, info = env.step(action)
          sum_reward += reward

          if done == 1:
              total_reward += sum_reward 
              #print(f"total steps: {step}")
              state = env.reset()
              sum_reward = 0
              break

    if done == 0:
      total_reward += sum_reward 

    #print(total_reward)

  chkpt_mean_rewards.append(total_reward/num_episodes)

          
print(chkpt_mean_rewards)

In [None]:
import matplotlib.pyplot as plt
plt.plot(chkpt_mean_rewards)
plt.show()

# Easy no reward

In [None]:
from fnmatch import fnmatch 

chkpt_root = "/content/drive/MyDrive/GitHub/INM363-Project/model_checkpoints/ppo/easy_no_reward" #_no_tpenalty"

pattern = '*checkpoint*'
checkpoints = [x for x in os.listdir(chkpt_root) if fnmatch(x,pattern )]


print("Total number of checkpoints: ", len(checkpoints))

chkpt_mean_rewards = [] 



for chkpt_dir in checkpoints:
  chkpt_pth = chkpt_root + '/' + chkpt_dir 
  print(chkpt_dir)
  agent.restore(chkpt_pth)

  num_episodes = 1
  total_steps = 0
  #chkpt_mean_reward = 0 

  for i in range(num_episodes):
    state = env.reset()
    sum_reward = 0
    n_step = 200

    for step in range(n_step):
          action = agent.compute_action(state)
          state, reward, done, info = env.step(action)
          sum_reward += reward

          if done == 1:
              total_steps += step 
              #print(f"total steps: {step}")
              state = env.reset()
              sum_reward = 0
              break

    if done == 0:
      total_steps += step 

    #print(total_reward)

  chkpt_mean_rewards.append(total_steps/num_episodes)

          
print(chkpt_mean_rewards)

In [None]:
import matplotlib.pyplot as plt
plt.plot(chkpt_mean_rewards)
plt.show()