# PPO Evaluation Script

In [None]:
import csv
import os
import pandas as pd
import random

from stable_baselines3 import PPO
from animalai.envs.environment import AnimalAIEnvironment
from mlagents_envs.envs.unity_gym_env import UnityToGymWrapper

import sys
sys.path.append("..")

from aaisrc.aaiyaml import find_yaml_files
from aaisrc.aaiyaml import yaml_combinor

In [None]:
def evaluate_ppo(env_path : str, configuration_file : str, model_path : str, save_path : str, watch : bool = False, num_runs : int = 100, verbose : bool = True):
    episode_rewards = []

    model = PPO.load(model_path)

    port = 5005 + random.randint(
    0, 10000
    )  # use a random port to avoid problems if a previous version exits slowly

    aai_env = AnimalAIEnvironment(
        inference=watch,
        seed = 2023,
        file_name=env_path,
        arenas_configurations=configuration_file,
        play=False,
        base_port=port,
        useCamera=False,
        useRayCasts=True,
        raysPerSide=15,
        rayMaxDegrees = 30,
        no_graphics=(not watch),
        timescale = 1
        )
    
    env = UnityToGymWrapper(aai_env, uint8_visual=False, allow_multiple_obs=True, flatten_branched=True) 

    obs = env.reset()
 
    
    for _episode in range(num_runs): 
        if verbose:
            print(f"Running episode {_episode+1} of {num_runs}.")   
        done = False
        episodeReward = 0
        while not done:
            action, _state = model.predict(obs[0], deterministic=False)
            obs, reward, done, info = env.step(action.item())
            episodeReward += reward
            env.render()
            if done:
                obs = env.reset()
                if verbose:
                    print(episodeReward)
                episode_rewards.append(episodeReward)
                break #to be sure
    
    env.close()

    results_dataframe = pd.DataFrame({"EpisodeNumber" : [x for x in range(num_runs)],
                                     "FinalReward" : episode_rewards})
    
    results_dataframe.to_csv(save_path,
                            index = False)

    return results_dataframe


In [None]:
def evaluate_ppo_directory(env_path : str, model_path : str, configuration_folder : str, tmp_file_location : str, save_path_csv : str, watch : bool = False, seed : int = 2023, batch_size: int = 50, port_base : int = 6600):
    
    model = PPO.load(model_path)
    
    yaml_index = 0

    port = port_base + yaml_index
        
    batch_counter = 0

    paths, names = find_yaml_files(configuration_folder)

    
    for yaml_index in range(0, len(paths), batch_size):
        if (yaml_index + batch_size) > len(paths) or batch_size > len(paths):
            upper_bound = len(paths)
        else:
            upper_bound = ((yaml_index + batch_size))
        
        print(f"Running inferences on batch {batch_counter + 1} of {batch_size} files of total {len(paths)}. {len(paths) - (batch_size * (batch_counter + 1))} instances to go.")

        batch_files = paths[yaml_index:upper_bound]

        batch_file_names = names[yaml_index:upper_bound]

        batch_temp_file_name = f"TempConfig_PPO_{seed}_{yaml_index}.yml"

        config_file_path = yaml_combinor(file_list = batch_files, temp_file_location=tmp_file_location, stored_file_name = batch_temp_file_name)
    
        aai_env = AnimalAIEnvironment( 
            inference=watch, #Set true when watching the agent
            seed = seed,
            worker_id=random.randint(0, 65500),
            file_name=env_path,
            arenas_configurations=config_file_path,
            base_port=port,
            useCamera=False,
            useRayCasts=True,
            raysPerSide = 15,
            rayMaxDegrees = 30,
            no_graphics=(not watch),
            timescale=1
        )

        env = UnityToGymWrapper(aai_env, uint8_visual=False, allow_multiple_obs=True, flatten_branched=True)

    

        random.seed(seed)

        for _episode in range(batch_size): 
            print(f"Running episode {_episode+1} of {batch_size}: {batch_file_names[_episode]}") 
        
            obs = env.reset()
        
            done = False
            episodeReward = 0

            while not done:

                action, _state = model.predict(obs[0], deterministic=False)
                obs, reward, done, info = env.step(action.item())
                episodeReward += reward
                env.render()
                
            file_exists = os.path.isfile(save_path_csv)
            with open(save_path_csv, 'a' if file_exists else 'w', newline='') as csv_file:
                csv_write = csv.writer(csv_file)
                if not file_exists:
                    csv_write.writerow(['episode', 'finalReward'])

                csv_write.writerow([str(names[yaml_index+_episode]), str(episodeReward)])
                print(f"Writing episode score {episodeReward} for episode {names[yaml_index+_episode]} to {save_path_csv}")
        env.close()
        os.remove(config_file_path)
        batch_counter += 1
    
    return True

In [None]:
button_dataframe = evaluate_ppo(env_path = "../env/AnimalAI.exe",
                         configuration_file = "../configs/buttonPressTask/buttonPressGreen.yml",
                         model_path = "../modelsaves/buttonPressTask/model_2000000.zip",
                         save_path = "../analysis/data/buttonPress/PPORaycast100Button.csv",
                         watch=False,
                         num_runs=100)

In [None]:
foraging_dataframe = evaluate_ppo(env_path = "../env/AnimalAI.exe",
                         configuration_file = "../configs/foragingTask/foragingTaskSpawnerTree.yml",
                         model_path = "../modelsaves/foragingTask/model_1000000.zip",
                         save_path = "../analysis/data/foraging/PPORaycast100Foraging.csv",
                         watch=False,
                         num_runs=100)

In [None]:
eval = False
pt = 1000
random.seed(2050)

while not eval:
    try:
        eval = evaluate_ppo(env_path = "../env/AnimalAI.exe",
                    model_path = "../modelsaves/competitionAAITestbed/model_2000000.zip",
                    configuration_folder = "../configs/competition",
                    tmp_file_location = "../..",
                    save_path_csv="../analysis/data/competitionAAITestbed/PPOAAITestbed2050.csv",
                    seed = random.randint(1000, 10000),
                    port_base = pt,
                    batch_size = 1,
                    watch=False)
    except:
        print("There was an error, probably due to port closure. Trying again with the next port.")
        pt += 1

