# PPO Algorithm

This code performs PPO on the Patrolling Zoo environment.

In [None]:
%load_ext autoreload
%autoreload 2

import torch
from patrolling_zoo.patrolling_zoo_v0 import parallel_env, PatrolGraph
from algorithm.ppo import PPO, Agent

# set process priority low
import psutil
import os
if os.name == 'nt':
    psutil.Process(os.getpid()).nice(psutil.BELOW_NORMAL_PRIORITY_CLASS)
else:
    psutil.Process(os.getpid()).nice(10)

In [None]:
import tensorflow as tf
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir="logs")
tensorboard_callback


# Configuration

In [None]:
total_timesteps = 3000*120
""" ENV SETUP """
patrolGraph = PatrolGraph("patrolling_zoo/env/cumberland.graph")
env = parallel_env(patrolGraph, 3,
    require_explicit_visit=False,
    speed= 1.0,
    alpha=2, # exponential bases for reward
    max_cycles=total_timesteps,
    reward_shift = 0.0,
)

"""ALGO PARAMS"""
algo = PPO(
    env = env,
    track = True,
    gamma = 0.99,
    total_timesteps = total_timesteps,
    num_steps = 3000,
    anneal_lr = False
)

# Training

In [None]:
agent = algo.train()

# Evaluation

In [None]:
algo.evaluate(render=True, agent=algo.agent, max_cycles=1)