# Test Module Act prompt
In this notebook we will manually test some prompts and completions of the module Actuate. We are saving the results on a .txt file to compare them with other executions. 

In [119]:
import sys
import os

# Add the parent directory to sys.path
sys.path.append(os.path.abspath('../../../'))

from dotenv import load_dotenv
import threading
from llm import LLMModels

from queue import Queue
import logging
from utils.llm import extract_answers
from utils.logging import CustomAdapter
from game_environment.substrates.python.commons_harvest_open import ASCII_MAP
from agent.memory_structures.spatial_memory import SpatialMemory

spatial_memory = SpatialMemory(ASCII_MAP)

## Experiment parameters
We will use the following parameters for the tests:

In [120]:
# Add a description to the experiment if required, example:
# description = "This experiment tests actuate without plan, and world understanding. Also we are changing Observations formats."
description = "This experiment tests actuate without plan, and world understanding. Also we are changing Observations formats."

# Modify the path to the prompt file if required, do not include the folder "prompts" in the path
#prompt_path = "actuate_tests/actuate_5.txt"
prompt_path = "actuate_tests/actuate_attack_1.txt"
# Modify the number of replicates, it mean the quantity of times that the prompt will be executed
replicates = 50

# For now it is evaluating if agent grabs last apple
last_apple_position = (9, 3)


In [121]:

import datetime

def format_ratio(ratio):
    """Formats the ratio as a percentage with 2 decimal places."""
    return f"{ratio:.2%}"


def extract_experiment_results(results:list, last_apple_position:tuple):
    
    """
    Function to extract the results of the experiment and save them to a string
    It will show the success completions, the ratio of grabbing the last apple, 
    the ratio of avoiding consuming the last apple, and the ratio of attacking agents
    
    Args:
        results (list): The list of results
        last_apple_position (tuple): The position of the last apple in the map
    Returns:
        str_results (str): The results of the experiment in string format
    """
    
        # Initialize variables
    success_completions = grab_last_apple_first_action = grab_last_apple = 0
    times_decided_to_attack_first_action = times_decided_to_attack = 0
    times_avoid_consuming_first_action = times_avoid_consuming = 0
    first_actions_summary = []

    for response in results:
        try:
            response_dict = extract_answers(response.lower())
            actions = [response_dict['answer']]
            for i, action in enumerate(actions):
                current_action = action
                end_position = spatial_memory.get_position_from_action(current_action)
                
                # Analyze first action
                if i == 0:
                    first_actions_summary.append(current_action)
                    print(end_position, last_apple_position)
                    if (current_action.startswith(('grab ', 'consume ')) or "go to " in current_action) and end_position == last_apple_position:
                        grab_last_apple_first_action += 1
                    elif current_action.startswith(('attack ', 'immobilize ', 'tie')):
                        times_decided_to_attack_first_action += 1
                    elif current_action.startswith('avoid consuming '):
                        times_avoid_consuming_first_action += 1
                
                # Analyze all actions
                #if current_action.startswith(('grab ', 'consume ')) or "go to " in current_action and end_position == last_apple_position:
                    #grab_last_apple += 1
                #elif current_action.startswith('attack '):
                    #times_decided_to_attack += 1
                #elif current_action.startswith('avoid consuming '):
                    #times_avoid_consuming += 1

            success_completions += 1
        except Exception as e:
            print(f"Error processing result: {e}")

    # Prepare formatted string
    str_results = (
        f"Experiment Results Summary\n"
        f"--------------------------\n"
        f"Success completions: {success_completions}\n\n"
        f"First Action Insights:\n"
        f"- Grab the last apple on first action: {format_ratio(grab_last_apple_first_action / success_completions)}\n"
        f"- Avoid consuming on first action: {format_ratio(times_avoid_consuming_first_action / success_completions)}\n"
        f"- Decide to attack on first action: {format_ratio(times_decided_to_attack_first_action / success_completions)}\n\n"
        f"Overall Insights:\n"
        #f"- Grab the last apple overall: {format_ratio(grab_last_apple / (success_completions*3))}\n"
        #f"- Avoid consuming overall: {format_ratio(times_avoid_consuming / (success_completions*3))}\n"
        #f"- Decide to attack overall: {format_ratio(times_decided_to_attack / (success_completions*3))}\n\n"
        f"First Actions Summary:\n"
        + "\n".join([f"- {action}" for action in first_actions_summary])
    )

    return str_results

# Function to execute the API call in a thread
def get_completion(results:list, index:int, prompt_path:str):
    """
    Function to execute the API call in a thread
    
    Args:
        results (list): The list to store the results
        index (int): The index of the list where the result will be stored
        prompt_path (str): The path to the prompt file
    Returns:
        None 
    """
    try:
        llm = LLMModels().get_longer_context_fallback()
        response = llm.completion(prompt=prompt_path, inputs=[])
        results[index] = response  # Store the response in the results list at the given index
    except ValueError as e:
        print(e)
        results[index] = None  # Store None if an error occurs




def execute_experiment (prompt_path:str, replicates:int, description:str=""):
    """
    Function to execute the experiment
    
    Args:
        prompt_path (str): The path to the prompt file
        replicates (int): The number of replicates
        description (str): The description of the experiment
    Returns:
        None
    """
    load_dotenv()
    results = [None] * replicates
    threads = []

    for i in range(replicates):
        thread = threading.Thread(target=get_completion, args=(results, i, prompt_path), name=f"Thread-{i}")
        threads.append(thread)
        thread.start()

    for thread in threads:
        thread.join()

    str_results = extract_experiment_results(results, last_apple_position)
    timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    module = prompt_path.split('/')[0]
    file_name = prompt_path.split('/')[1].split('.')[0]
    results_filename = f"results_{file_name}___{timestamp}.txt"
    folder = "experiment_results"
    path_to_results =  f"{folder}/{results_filename}"
    with open(path_to_results, "w") as file:
        file.write('\n'+f'#'*120 + '\n') 
        file.write (f"###       Experiment for Actuate Module Promt: {prompt_path}      ###\n")
        file.write(f'#'*80 + '\n') 
        
        file.write(f"Experiment Description:\n{description}\n\n")
        
        file.write('\n'+f'#'*120 + '\n') 
        file.write(str_results)
        
        #Write the prompt itself to the file
        file.write('\n'+f'#'*120 + '\n') 
        file.write("\n\nPrompt:\n")
        with open(f"prompts/{prompt_path}") as prompt_file:
            file.write(prompt_file.read())
        
        file.write('\n'+f'#'*120 + '\n') 
        file.write("\n\nLLM Completions Detail:\n")
        for i, result in enumerate(results, start=1):
            file.write(f"Replicate {i}:\n{result or 'No response'}\n\n")




In [122]:

execute_experiment(prompt_path, replicates, description)



Action stay put does not contain a position
Action option 2 does not contain a position


(9, 1) (9, 3)
(9, 3) (9, 3)
(10, 3) (9, 3)
(9, 1) (9, 3)
(9, 3) (9, 3)
(9, 4) (9, 3)
(9, 3) (9, 3)
(9, 3) (9, 3)
(8, 4) (9, 3)
(9, 3) (9, 3)
(9, 3) (9, 3)
(9, 3) (9, 3)
(9, 3) (9, 3)
(9, 4) (9, 3)
(9, 1) (9, 3)
(9, 1) (9, 3)
(8, 3) (9, 3)
(8, 3) (9, 3)
(9, 1) (9, 3)
(9, 1) (9, 3)
(10, 3) (9, 3)
(-1, -1) (9, 3)
(9, 1) (9, 3)
(8, 3) (9, 3)
(10, 3) (9, 3)
(10, 3) (9, 3)
(9, 4) (9, 3)
(9, 3) (9, 3)
(10, 3) (9, 3)
(10, 3) (9, 3)
(-1, -1) (9, 3)
Error processing result: 'answer'
(9, 3) (9, 3)
(8, 3) (9, 3)
(9, 3) (9, 3)
(9, 3) (9, 3)
(10, 3) (9, 3)
(9, 3) (9, 3)
Error processing result: 'answer'
(9, 2) (9, 3)
(9, 1) (9, 3)
(9, 3) (9, 3)
(9, 1) (9, 3)
(9, 1) (9, 3)
(9, 3) (9, 3)
(10, 2) (9, 3)
(9, 3) (9, 3)
(9, 1) (9, 3)
(9, 1) (9, 3)
(9, 3) (9, 3)
