# SimpleSim Non-Holonomic Navigation Challenge

This notebook is made to train the best possible performance version of the final ReLOaD architecture, with:
- Variable budget
- Variable num targets
- Absolute information gain (entropy-based) reward
  - Not normalised against the number of targets or the length of the episode


## Install Dependencies and Stable Baselines3 Using Pip

In [1]:
# !pip install "stable-baselines3[extra]>=2.0.0a4"

### Setup Tensorboard Logging

In [2]:
import os

# # Clear any logs from previous runs
# !rm -rf ./logs/

# Load the TensorBoard notebook extension
%load_ext tensorboard

## Import Custom Gym Env

In [3]:
import numpy as np
import gymnasium as gym
from gymnasium import spaces

# Import our main environment
from env_gym import SimpleSimGym

### Validate the environment

In [4]:
from stable_baselines3.common.env_checker import check_env

env = SimpleSimGym(max_budget=500, max_targets=3, num_classes=10, player_fov=60)

# If the environment don't follow the interface, an error will be thrown
check_env(env, warn=True)

Environment seed: 842


## Import Auto Saving of Best Model Callback

In [5]:
from utils import SaveOnBestTrainingRewardCallback

## Train The Model

In [6]:
from stable_baselines3 import PPO, SAC, TD3
from stable_baselines3.common.env_util import make_vec_env

# Environment Parameters
MAX_BUDGET = 400
MAX_TARGETS = 5
NUM_CLASSES = 10
PLAYER_FOV = 30
RENDER_MODE = "rgb_array"
ACTION_FORMAT = "continuous"

config = {
    "policy": 'MlpPolicy',
    "total_timesteps": 6_000_000,
    "logdir": "logs/",
    "savedir": "saved_models/",
}

# Create log dir
os.makedirs(config["logdir"], exist_ok=True)

# Create save dir
os.makedirs(config["savedir"], exist_ok=True)

# Show Tensorboard Logs
Visualise the live logs on tensorboard as we train

In [12]:
# Open Tensorboard Logging
%tensorboard --logdir logs/ --reload_multifile True --reload_interval 30
# --port 6007

Reusing TensorBoard on port 6006 (pid 86057), started 3 days, 5:11:17 ago. (Use '!kill 86057' to kill it.)

### Train SAC

In [8]:
# Instantiate and wrap the env
env_sac = make_vec_env(SimpleSimGym, 
                   n_envs=1, 
                   monitor_dir=config["logdir"]+"sac", 
                   env_kwargs=dict(
                       max_budget=MAX_BUDGET, 
                       max_targets=MAX_TARGETS, 
                       num_classes=NUM_CLASSES, 
                       player_fov=PLAYER_FOV, 
                       render_mode=RENDER_MODE, 
                       action_format=ACTION_FORMAT))

print("TRAINING MODEL WITH ABSOLUTE POSITIONS")

# Setup callbacks
auto_save_callback = SaveOnBestTrainingRewardCallback(check_freq=1000, log_dir=config["logdir"], save_dir=config["savedir"], model_name="sac", verbose=0)

# Create the agent
model_sac = SAC(config["policy"], env_sac, tensorboard_log=config["logdir"], verbose=0)

# Train in tranches
times_trained = 0
num_tranches = 10
tranches = num_tranches*(1+times_trained)

for i in range((num_tranches * times_trained)+1, tranches+1):
    print(f"^ Tranch {i}/{tranches}")
    if i == 1:
        model_sac.learn(config["total_timesteps"]//tranches, tb_log_name="SAC", callback=auto_save_callback, progress_bar=True, reset_num_timesteps=True)
    else:
        model_sac.learn(config["total_timesteps"]//tranches, tb_log_name="SAC", callback=auto_save_callback, progress_bar=True, reset_num_timesteps=False)
    model_sac.save(f"{config['savedir']}/{config['policy']}_SAC_step{i * (config['total_timesteps']//tranches)}")

Environment seed: 421


Output()

TRAINING MODEL ON A VARIABLE NUMBER OF TARGETS (OTHER WINDOW IN MAIN REPO FOLDER IS TRAINING FIXED 5 TARGETS)
^ Tranch 1/10


Output()

^ Tranch 2/10


Output()

^ Tranch 3/10


Output()

^ Tranch 4/10


Output()

^ Tranch 5/10


Output()

^ Tranch 6/10


Output()

^ Tranch 7/10


Output()

^ Tranch 8/10


Output()

^ Tranch 9/10


Output()

^ Tranch 10/10


### Load the Best Model

In [9]:
# Load the best model
best_sac = SAC.load(f"{config['savedir']}/best_sac")

### Check Performance

Check if the policy can consistently succeed in the environment over multilpe episodes.

In [10]:
from stable_baselines3.common.evaluation import evaluate_policy

# Instantiate the eval env
eval_env = make_vec_env(SimpleSimGym, 
                   n_envs=1, 
                   # monitor_dir=config["logdir"], 
                   env_kwargs=dict(
                       max_budget=MAX_BUDGET, 
                       max_targets=MAX_TARGETS, 
                       num_classes=NUM_CLASSES, 
                       player_fov=PLAYER_FOV, 
                       render_mode=RENDER_MODE, 
                       action_format=ACTION_FORMAT
                   )
                  )

# Check performance of best vs last model
models = {"last_sac": model_sac, "best_sac": best_sac}

for key in models.keys():
    # Reset the eval env
    eval_env.reset()
    # Test average reward over multiple episodes
    mean_reward, std_reward = evaluate_policy(models[key], eval_env, n_eval_episodes=50)
    print(f"MODEL TYPE: {key}")
    print(f"mean_reward:{mean_reward:.2f} +/- {std_reward:.2f}\n")

Environment seed: 17
MODEL TYPE: last_sac
mean_reward:807.24 +/- 653.98

MODEL TYPE: best_sac
mean_reward:760.50 +/- 704.75



### Visualize Trained Agent with Video

In [11]:
from utils import record_video, show_videos

record_video(eval_env, model_sac, video_length=500*3, prefix="sac-last-simplesim")
show_videos("videos", prefix="sac-last")

record_video(eval_env, best_sac, video_length=500*3, prefix="sac-best-simplesim")
show_videos("videos", prefix="sac-best")

_XSERVTransmkdir: ERROR: euid != 0,directory /tmp/.X11-unix will not be created.
_XSERVTransSocketUNIXCreateListener: mkdir(/tmp/.X11-unix) failed, errno = 2
_XSERVTransMakeAllCOTSServerListeners: failed to create listener for local
(EE) 
Fatal server error:
(EE) Cannot establish any listening sockets - Make sure an X server isn't already running(EE) 


Saving video to /Users/alexnicholson/Uni/2023_S2/METR4911/ReLOaD/reload/simplesim/videos/sac-last-simplesim-step-0-to-step-1500.mp4
Moviepy - Building video /Users/alexnicholson/Uni/2023_S2/METR4911/ReLOaD/reload/simplesim/videos/sac-last-simplesim-step-0-to-step-1500.mp4.
Moviepy - Writing video /Users/alexnicholson/Uni/2023_S2/METR4911/ReLOaD/reload/simplesim/videos/sac-last-simplesim-step-0-to-step-1500.mp4



                                                                                                                                                                                                                                                                                                                                                                         

Moviepy - Done !
Moviepy - video ready /Users/alexnicholson/Uni/2023_S2/METR4911/ReLOaD/reload/simplesim/videos/sac-last-simplesim-step-0-to-step-1500.mp4


_XSERVTransmkdir: ERROR: euid != 0,directory /tmp/.X11-unix will not be created.
_XSERVTransSocketUNIXCreateListener: mkdir(/tmp/.X11-unix) failed, errno = 2
_XSERVTransMakeAllCOTSServerListeners: failed to create listener for local
(EE) 
Fatal server error:
(EE) Cannot establish any listening sockets - Make sure an X server isn't already running(EE) 


Saving video to /Users/alexnicholson/Uni/2023_S2/METR4911/ReLOaD/reload/simplesim/videos/sac-best-simplesim-step-0-to-step-1500.mp4
Moviepy - Building video /Users/alexnicholson/Uni/2023_S2/METR4911/ReLOaD/reload/simplesim/videos/sac-best-simplesim-step-0-to-step-1500.mp4.
Moviepy - Writing video /Users/alexnicholson/Uni/2023_S2/METR4911/ReLOaD/reload/simplesim/videos/sac-best-simplesim-step-0-to-step-1500.mp4



                                                                                                                                                                                                                                                                                                                                                                         

Moviepy - Done !
Moviepy - video ready /Users/alexnicholson/Uni/2023_S2/METR4911/ReLOaD/reload/simplesim/videos/sac-best-simplesim-step-0-to-step-1500.mp4
