# PPO Algorithm

This code performs PPO on the Patrolling Zoo environment.

In [None]:
%load_ext autoreload
%autoreload 2

import torch
from patrolling_zoo.env.clean_patrolling_zoo import parallel_env
from patrolling_zoo.env.patrol_graph import PatrolGraph
from algorithm.clean_ppo import PPO, Agent

# set process priority low
import psutil
import os
if os.name == 'nt':
    psutil.Process(os.getpid()).nice(psutil.BELOW_NORMAL_PRIORITY_CLASS)
else:
    psutil.Process(os.getpid()).nice(10)

In [None]:
import tensorflow as tf
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir="logs")
tensorboard_callback


# Configuration

In [None]:
total_timesteps = 3000*120
""" ENV SETUP """
patrolGraph = PatrolGraph("patrolling_zoo/env/4nodes.graph")
env = parallel_env(patrolGraph, 3,
    require_explicit_visit=False,
    speed= 1.0,
    alpha=2, # exponential bases for reward
    max_cycles=total_timesteps,
    observe_method = "raw",
    reward_method = "raw",
    stayLimit = 5,
)

"""ALGO PARAMS"""
algo = PPO(
    env = env,
    total_timesteps = total_timesteps,
    num_steps = 1000,
)

# Training

In [None]:
agent = algo.train()

# Evaluation

In [None]:
algo.evaluate(render=True, agent=algo.agent, max_cycles=1)