# PPO evaluation script

In [None]:
import random
import os
import time
import torch as th
import pandas as pd

from stable_baselines3 import PPO
from mlagents_envs.envs.unity_gym_env import UnityToGymWrapper
from animalai.envs.environment import AnimalAIEnvironment


from animalai.envs.environment import AnimalAIEnvironment
from animalai.envs.actions import AAIActions, AAIAction
#from gym_unity.envs import UnityToGymWrapper

import sys
sys.path.append('../src')

from yamlHandling import find_yaml_files #this function finds the yaml files in a directory.
from yamlHandling import yaml_combinor #this function combines a batch of yaml files and saves the output in a temporary folder. This means we can run inference on batches of tests at once.
from mysqlConnection import databaseConnector #this function permits connection to a mysql database using a CSV file containing details of the db connection.
from mysqlConnection import agentToDB #this function takes a dictionary and ingresses it into a table
from mysqlConnection import removePreviouslyRunInstances #this function takes a set of yaml files and task names and removes any that have already got results in the database.
from mysqlConnection import selectID #this function finds the integer ID for a table given a particular column name and value

## Database Connection

A function for connecting to the database.

In [None]:
mycursor, connection = databaseConnector('databaseConnectionDetails.csv')

mycursor.close()

print("Connection checked and closed.")

## Paths

Provide the paths to the directory containing the configs being tested over, as well as the path the animal ai environment. Provide a location for generating temporary files of combined configs. This defaults to the parent directory of the github repository, to prevent results being pushed accidentally. Finally, provide the location of the saved PPO model in the modelsaves folder (gitignored).

In [None]:
configuration_folder = "../../configs/tests_agents"

#env_path = "../../env/AnimalAI"
env_path = "C:/Users/kv301/Documents/animal-ai/env/AnimalAI.exe"

temp_folder_location = "../../.."

ppo_model_save_location = "../modelsaves/modelsaves/sanity_green_test/model_1000" #a dummy file (64x64 CNN trained for 1000 steps)

## Add All Tasks In Directory To Database

Iterate through the directory and find yaml files and their task names.

In [None]:
rerunInstanceTable = False

mycursor, connection = databaseConnector('databaseConnectionDetails.csv')

yaml_files, task_names = find_yaml_files(configuration_folder)

if rerunInstanceTable:
    dropTable = "DROP TABLE IF EXISTS ppoinstanceresults, ppointrainstanceresults, instances;"
    mycursor.execute(dropTable)
    
    sql = "CREATE TABLE instances(instanceid INT AUTO_INCREMENT PRIMARY KEY, instancename VARCHAR(750) UNIQUE NOT NULL);"
    mycursor.execute(sql)

for instance in task_names:
    try:
        insertQuery = "INSERT INTO instances(instancename) VALUES('" + str(instance) + "');"
        mycursor.execute(insertQuery)
        connection.commit()
    except:
        print(f"Task {instance} has already been added to this table. Moving to next.")

mycursor.close()

### Create Agent Table

Create a table that stores details about the agent. Currently just storing a single agent, but easily scalable to multiple.

In [None]:
rerunAgentTable = True

mycursor, connection = databaseConnector('databaseConnectionDetails.csv')

if rerunAgentTable:
    dropTable = "DROP TABLE IF EXISTS  ppoagents, ppoagentinstanceresults, ppoagentintrainstanceresults;"
    mycursor.execute(dropTable)
    
    sql = "CREATE TABLE `ppoagents` (`agentid` INT AUTO_INCREMENT PRIMARY KEY, `agent_tag` VARCHAR(300), `aai_seed` INT, `policy` VARCHAR(10), `aai_env_version` VARCHAR(20), `learning_rate` FLOAT(8), `n_steps_training` INT, `batch_size` INT, `n_epochs` INT, `gamma` FLOAT(8), `gae_lambda` FLOAT(8), `clip_range` FLOAT(8), `clip_range_vf` VARCHAR(10), `normalize_advantage` BOOL, `ent_coef` FLOAT(8), `vf_coef` FLOAT(8), `max_grad_norm` FLOAT(8), use_sde BOOL, sde_sample_freq FLOAT(8), `target_kl` VARCHAR(10), stats_window_size FLOAT(8), `training_curriculum` VARCHAR(100), `state_size` INT, `act_func` VARCHAR(30), UNIQUE(agent_tag, aai_seed, training_curriculum));"
    mycursor.execute(sql)

mycursor.close()

In [None]:
ppo_agent_dict = {
    "policy" : "CNN", #or "MLP" for raycasts
    "aai_env_version" : "aai.3.1.3",
    "learning_rate" : 0.0003,
    "n_steps_training" : 1000,
    "batch_size" : 64,
    "n_epochs" : 10,
    "gamma" : 0.99,
    "gae_lambda" : 0.95,
    "clip_range" : 0.2,
    "clip_range_vf" : "None", #must be string, not int/float
    "normalize_advantage" : True,
    "ent_coef" : 0,
    "vf_coef" : 0.5,
    "max_grad_norm" : 0.5,
    "use_sde" : False,
    "sde_sample_freq" : -1,
    "target_kl" : "None", #must be string, not int/float
    "stats_window_size" : 100,

    "training_curriculum" : "sanity_green_random", #which training curriculum was the agent trained on?
    "agent_tag" : "sanitygreen_ppo_1000_64x64",
    "state_size" : 64, #what is the size of the state (64x64)
    "act_func" : "ReLU"
}



In [None]:
agent_dict_list = [ppo_agent_dict]

seeds_to_run = [2023] #one seed to limit compute/time.

In [None]:
mycursor, connection = databaseConnector('databaseConnectionDetails.csv')

for agent in agent_dict_list:
    for seed in seeds_to_run:
        agent['aai_seed'] = seed
        agentToDB(mycursor, agent, table_name = "ppoagents")

connection.commit()

mycursor.close()

## Run Inference And Store

Run saved PPO agent on configuration files and store results in database.

In [None]:
mycursor, connection = databaseConnector('databaseConnectionDetails.csv')

rebuildInstanceResultsTables = True

if rebuildInstanceResultsTables:
    print("Rebuilding results tables, dropping if they already exist.")

    dropInstanceResultsTables = "DROP TABLE IF EXISTS ppoagentinstanceresults, ppoagentintrainstanceresults;"
    mycursor.execute(dropInstanceResultsTables)
    
    createInstanceTable = "CREATE TABLE ppoagentinstanceresults(instanceid INT NOT NULL, agentid INT NOT NULL, finalreward FLOAT(53), FOREIGN KEY (instanceid) REFERENCES instances(instanceid), FOREIGN KEY(agentid) REFERENCES ppoagents(agentid), PRIMARY KEY (instanceid, agentid));"
    mycursor.execute(createInstanceTable)

    createIntraInstanceTable = "CREATE TABLE ppoagentintrainstanceresults(instanceid INT NOT NULL, agentid INT NOT NULL, step INT NOT NULL, actiontaken INT NOT NULL, stepreward FLOAT(53), xvelocity FLOAT(32), yvelocity FLOAT(32), zvelocity FLOAT(32), xpos FLOAT(32), ypos FLOAT(32), zpos FLOAT(32), FOREIGN KEY (instanceid) REFERENCES instances(instanceid), FOREIGN KEY(agentid) REFERENCES ppoagents(agentid), PRIMARY KEY(instanceid, agentid, step));"
    mycursor.execute(createIntraInstanceTable)

    print("Tables: ppoagentinstanceresults and ppointrainstanceresults have been successfully built.")

mycursor.close()

In [None]:
def runPPOAndStore (cur, con, model_save_path, batch_size: int, agent_dict: dict, yaml_files, task_names, temp_folder_location, agent_inference = False, port_base = 6600, randomise_port = True, verbose = True):
    
    # first, check if this agent has been added to the DB already

    agentid = selectID(cur, id_name = "agentid", table_name = "ppoagents", WHERE_column = "agent_tag", WHERE_clause = agent_dict['agent_tag'], secondary_WHERE_column = "aai_seed", secondary_WHERE_clause = agent_dict['aai_seed'])
    
    try:
        task_names, yaml_files = removePreviouslyRunInstances(cur = cur, yaml_files=yaml_files, task_names=task_names, agentid=agentid, agent_table = "ppoagents", agent_instance_results_table = "ppoagentinstanceresults")
    except:
        print("Running on all files.")

    # now proceed with testing
    yaml_index = 0

    if randomise_port:

        port = port_base + yaml_index + random.randint( #create random base port.
            0, 9000
            )
        
    else:
        port = port_base + yaml_index
        
    batch_counter = 0

    #set seed
    random.seed(agent_dict['aai_seed'])

    #load agent
    model = PPO.load(model_save_path)

    if len(yaml_files) > 0:
        for yaml_index in range(0, len(yaml_files), batch_size):

            if (yaml_index + batch_size) > len(yaml_files) or batch_size > len(yaml_files):
                upper_bound = len(yaml_files)
            else:
                upper_bound = ((yaml_index + batch_size))

            if verbose:
                print(f"Running inferences on batch {batch_counter + 1} of {batch_size} files of total {len(yaml_files)}. {len(yaml_files) - (batch_size * (batch_counter + 1))} instances to go.")

            batch_files = yaml_files[yaml_index:upper_bound]

            batch_file_names = task_names[yaml_index:upper_bound]

            batch_temp_file_name = f"TempConfig_{agent_dict['agent_tag']}_{agent_dict['aai_seed']}_{yaml_index}.yml"

            config_file_path = yaml_combinor(file_list = batch_files, temp_file_location=temp_folder_location, stored_file_name = batch_temp_file_name)

            if verbose:
                print("Opening AAI Environment.")

            temp_port = port + yaml_index # increment through ports to prevent calling the same socket.

            aai_env = AnimalAIEnvironment( 
                inference=agent_inference, #Set true when watching the agent
                seed = agent_dict['aai_seed'],
                worker_id=agent_dict['aai_seed'],
                file_name=env_path,
                arenas_configurations=config_file_path,
                base_port=temp_port,
                useCamera=True,
                resolution=agent_dict['state_size'], #make resolution small to improve processing speed - random walkers don't need anything.
                useRayCasts=False,
                timescale=1,
                no_graphics=False
            )

            env = UnityToGymWrapper(aai_env, uint8_visual=True, allow_multiple_obs=True, flatten_branched=True)

            obs = env.reset()
            print(obs)

            for _instance in range(len(batch_files)): 

                #get instance ID
                instanceid = selectID(cur, id_name = "instanceid", table_name = "instances", WHERE_column = "instancename", WHERE_clause = batch_file_names[_instance])

                #prepare to run instance
                done = False

                episodeReward = 0

                step_counter = 0
    
                while not done:

                    action, _state = model.predict(obs[0], deterministic=False)
                    obs, reward, done, info = env.step(action.item())
                    episodeReward += reward
                    step_counter += 1
                    env.render()

                    try:
                        intraInstanceQuery = f"INSERT INTO ppoagentintrainstanceresults(instanceid, agentid, step, actiontaken, stepreward, xvelocity, yvelocity, zvelocity, xpos, ypos, zpos) VALUES ({instanceid}, {agentid}, {step_counter}, {action}, {float(episodeReward)}, {obs[1][1]}, {obs[1][2]}, {obs[1][3]}, {obs[1][4]}, {obs[1][5]}, {obs[1][6]});"
                        cur.execute(intraInstanceQuery)
                        #con.commit()
         
                    except:
                        print(f"There's something wrong with this step. Here's the query {intraInstanceQuery}")
                        pass

                    if done:
                        if verbose:
                            print(F"Episode Reward: {episodeReward}")
                        obs = env.reset()
                        break #to be sure
                
                try:
                    insertInstanceResults = f"INSERT INTO ppoagentinstanceresults(instanceid, agentid, finalreward) VALUES ({instanceid}, {agentid}, {episodeReward});"
                    cur.execute(insertInstanceResults)
                    con.commit()
                except:
                    print("It looks like this agent has already been tested on this instance.")
    
            env.close()

            batch_counter += 1

            os.remove(config_file_path)

            if verbose:
                print("Moving to next batch.")

    else:
        if verbose:
            print("This agent has already been run and is in the database. Skipping so as not to waste time. If you suspect that the agent has not been fully evaluated on all tests, you may want to restart the instances for that agent.")

       

In [None]:
def run_agent_on_instance_wrapper(seed, agent, model_save_path, yaml_batch_size=1, port_base = 6600, randomise_port = True, verbose = True):
    agent['aai_seed'] = seed
    
    if verbose:
        print(f"Running {agent['agent_tag']} on seed {seed}.")
    
    mycursor, connection = databaseConnector('databaseConnectionDetails.csv')

    runPPOAndStore(mycursor, connection, model_save_path, yaml_batch_size, agent_dict=agent, yaml_files=yaml_files, task_names=task_names, temp_folder_location=temp_folder_location, agent_inference=True, port_base = port_base, randomise_port = randomise_port, verbose = verbose)

    mycursor.close()

In [None]:
yaml_batch_size = 10 #problem with task ordering, so having to do batches of 1. Much slower...
counter = 0
verbose = True
adhoc_port = 4444

while counter <= (len(seeds_to_run) * len(agent_dict_list)):
    try:
        for seed in seeds_to_run:
            for agent_dictionary in agent_dict_list:
                adhoc_port = (counter+10)*100
                run_agent_on_instance_wrapper(seed, agent_dictionary, model_save_path=ppo_model_save_location, yaml_batch_size=yaml_batch_size, port_base = adhoc_port, randomise_port = True, verbose = verbose)
                if verbose:
                    print("Moving to next seed.")
                counter += 1
            if counter > (len(seeds_to_run) * len(agent_dict_list)):
                break
            if verbose:
                 print("Moving to next agent.")
    except:
        print("Sockets were occupied. Waiting 10 seconds and starting again.")
        counter = 0
        time.sleep(10)