# Random Action Agents

In [1]:
import csv
import pandas as pd
import os
import random

from animalai.envs.environment import AnimalAIEnvironment
from mlagents_envs.envs.unity_gym_env import UnityToGymWrapper

import sys
sys.path.append("..")

from aaisrc.aaiagents import RandomActionAgent
from aaisrc.aaiyaml import find_yaml_files
from aaisrc.aaiyaml import yaml_combinor

In [None]:
def evaluate_randomactionagent(env_path : str, configuration_file : str, save_path : str, watch : bool = False, num_runs : int = 100, verbose : bool = True):
    episode_rewards = []

    versionThreeRandomaActionAgent = RandomActionAgent(max_step_length=10,
                                             step_length_distribution='normal',
                                             norm_mu=5,
                                             norm_sig=1)
    
    port = 4000 + random.randint(
    0, 1000
    )  # use a random port to avoid problems if a previous version exits slowly
    
    aai_env = AnimalAIEnvironment( 
        inference=watch, #Set true when watching the agent
        seed = 2023,
        worker_id=random.randint(0, 65500),
        file_name=env_path,
        arenas_configurations=configuration_file,
        base_port=port,
        useCamera=False,
        useRayCasts=False,
        no_graphics=True,
        timescale=1
    )

    env = UnityToGymWrapper(aai_env, uint8_visual=False, allow_multiple_obs=True, flatten_branched=True)

    

    random.seed(2023)

    for _episode in range(num_runs): 
        if verbose:
            print(f"Running episode {_episode+1} of {num_runs}.") 
        
        obs = env.reset()
        
        done = False
        episodeReward = 0
        initialActionAgent = versionThreeRandomaActionAgent
        initialActionAgent.prev_step_bias = 0 #select a random action according to the biases. There is no previous step bias as there is no previous step at the start of an episode!

        previous_action = initialActionAgent.get_new_action(prev_step=0) 

        while not done:

            step_list = versionThreeRandomaActionAgent.get_num_steps(prev_step = previous_action)
            
            for action in step_list:
                
                obs, reward, done, info = env.step(int(action))
                episodeReward += reward
                env.render()

                previous_action = action

                if done:
                    print(F"Episode Reward: {episodeReward}")
                    obs=env.reset()
                    #env.close()
                    episode_rewards.append(episodeReward)
                    break
                    
            if not done:
                ## get new action for one step before repeating while loop.

                action = versionThreeRandomaActionAgent.get_new_action(prev_step = previous_action)
                
                obs, reward, done, info = env.step(int(action))
                
                episodeReward += reward
                env.render()

                previous_action = action

                if done:
                    print(F"Episode Reward: {episodeReward}")
                    obs=env.reset()
                    #env.close()
                    episode_rewards.append(episodeReward)
                    break #to be sure.
        
    env.close()
    
    results_dataframe = pd.DataFrame({"EpisodeNumber" : [x for x in range(num_runs)],
                                     "FinalReward" : episode_rewards})
    
    results_dataframe.to_csv(save_path,
                            index = False)

    return results_dataframe

In [None]:
def evaluate_randomactionagent_episode(env_path : str, configuration_folder : str, tmp_file_location : str, save_path_csv : str, watch : bool = False, seed : int = 2023, batch_size: int = 50, port_base : int = 6600):
    episode_rewards = []

    versionThreeRandomaActionAgent = RandomActionAgent(max_step_length=10,
                                             step_length_distribution='normal',
                                             norm_mu=5,
                                             norm_sig=1)
    
    yaml_index = 0

    port = port_base + yaml_index
        
    batch_counter = 0

    paths, names = find_yaml_files(configuration_folder)

    
    for yaml_index in range(0, len(paths), batch_size):

        completed = False

        while not completed:
            try:
                if (yaml_index + batch_size) > len(paths) or batch_size > len(paths):
                    upper_bound = len(paths)
                else:
                    upper_bound = ((yaml_index + batch_size))
                
                print(f"Running inferences on batch {batch_counter + 1} of {batch_size} files of total {len(paths)}. {len(paths) - (batch_size * (batch_counter + 1))} instances to go.")

                batch_files = paths[yaml_index:upper_bound]

                batch_file_names = names[yaml_index:upper_bound]

                batch_temp_file_name = f"TempConfig_RandomAction_{seed}_{yaml_index}.yml"

                config_file_path = yaml_combinor(file_list = batch_files, temp_file_location=tmp_file_location, stored_file_name = batch_temp_file_name)
            
                aai_env = AnimalAIEnvironment( 
                    inference=watch, #Set true when watching the agent
                    seed = seed,
                    worker_id=random.randint(0, 65500),
                    file_name=env_path,
                    arenas_configurations=config_file_path,
                    base_port=port,
                    useCamera=False,
                    useRayCasts=False,
                    no_graphics=True,
                    timescale=1
                )

                env = UnityToGymWrapper(aai_env, uint8_visual=False, allow_multiple_obs=True, flatten_branched=True)

            

                random.seed(seed)

                for _episode in range(batch_size): 
                    print(f"Running episode {_episode+1} of {batch_size}.") 

                    obs = env.reset()
                
                    done = False
                    episodeReward = 0
                    initialActionAgent = versionThreeRandomaActionAgent
                    initialActionAgent.prev_step_bias = 0 #select a random action according to the biases. There is no previous step bias as there is no previous step at the start of an episode!

                    previous_action = initialActionAgent.get_new_action(prev_step=0) 

                    while not done:

                        step_list = versionThreeRandomaActionAgent.get_num_steps(prev_step = previous_action)
                    
                        for action in step_list:
                        
                            obs, reward, done, info = env.step(int(action))
                            episodeReward += reward
                            env.render()

                            previous_action = action

                            if done:
                                print(F"Episode Reward: {episodeReward}")
                                obs=env.reset()
                                #env.close()
                                episode_rewards.append(episodeReward)
                                break
                            
                        if not done:
                            ## get new action for one step before repeating while loop.

                            action = versionThreeRandomaActionAgent.get_new_action(prev_step = previous_action)
                        
                            obs, reward, done, info = env.step(int(action))
                        
                            episodeReward += reward
                            env.render()

                            previous_action = action

                            if done:
                                print(F"Episode Reward: {episodeReward}")
                                obs=env.reset()
                                #env.close()
                                episode_rewards.append(episodeReward)
                                break #to be sure.
                        
                    file_exists = os.path.isfile(save_path_csv)
                    with open(save_path_csv, 'a' if file_exists else 'w', newline='') as csv_file:
                        csv_write = csv.writer(csv_file)
                        if not file_exists:
                            csv_write.writerow(['episode', 'finalReward'])

                        csv_write.writerow([str(names[yaml_index+_episode]), str(episodeReward)])
                        print(f"Writing episode score {episodeReward} for episode {names[yaml_index+_episode]} to {save_path_csv}")
                env.close()
                os.remove(config_file_path)
                batch_counter += 1
                completed = True
            except:
                print("Episode failed. Retrying")
                port += 1
                try: 
                    env.close()
                except:
                    pass
                completed = False
                break
    
    return True

In [None]:
RAA_results_foraging = evaluate_randomactionagent(env_path = "../env/AnimalAI.exe",
                                       configuration_file = "../configs/foragingTask/foragingTaskSpawnerTree.yml",
                                       save_path = "../analysis/data/foraging/randomActionAgent100Foraging.csv",
                                       num_runs=100)

In [None]:
RAA_results_button = evaluate_randomactionagent(env_path = "../env/AnimalAI.exe",
                                       configuration_file = "../configs/buttonPressTask/buttonPressGreen.yml",
                                       save_path = "../analysis/data/buttonPress/randomActionAgent100Button.csv",
                                       num_runs=100)

In [None]:
seeds = [9022, 8812, 1056, 9917, 1942]

for seed in seeds:
    save_path = f"../analysis/data/competitionAAITestbed/RandomActionAgent_{seed}.csv"
    eval = evaluate_randomactionagent(env_path="../env/AnimalAI.exe",
                                      configuration_folder="../configs/competition",
                                      tmp_file_location="../..",
                                      save_path_csv=save_path,
                                      seed = seed,
                                      port_base = seed + random.randint(0, 1000),
                                      batch_size=1)
