## Import required libraries and packages

In [1]:
import yaml
import os
import pathlib
import time
from Agent.xwAPI import xwBackend
from utils.construct_prompt import get_api_doc
from utils.ChatGPT import ChatGPT
from tqdm import tqdm

In [2]:
CONFIG_FILE = "./config/config.yaml"
OUTPUT_PATH = "../output_dir"

## Helper functions

In [3]:
def set_agent_configuration(configuration_file_path):
    with open(configuration_file_path, mode="r") as file:
        config = yaml.load(file, Loader=yaml.Loader)

    agent_config = config["Agent"]
    agent_config["ChatGPT_1"]["api_keys"] = [os.environ["OPENAI_API_KEY"]]
    return agent_config

In [4]:
def get_api_documentation(agent_config):
    with open(agent_config["api_doc_path"]) as f:
        api_doc = yaml.load(f, Loader=yaml.FullLoader)
    return api_doc

In [5]:
def get_excel_backend(agent_config, api_doc):
    if agent_config["API_backend"] == "xw":
        xw_backend = xwBackend(agent_config["APP_backend"], api_doc)
    return xw_backend

In [6]:
def create_path_generator(file_path):
    return pathlib.Path(f"{file_path}").glob("**/*")

In [7]:
def get_source_excel_filenames(path_generator):
    response_log_paths = [
        str(path).split("\\")[-1] for path in path_generator if path.is_file()
    ]
    return [f"{log_file.split('_')[-1][:-5]}.xlsx" for log_file in response_log_paths]

In [8]:
def get_sheet_state(file_path, backend):
    if file_path is not None:
        time.sleep(0.5)
        backend.OpenWorkbook(file_path)

    return backend.GetSheetsState()

In [9]:
def extract_docs_for_input_functions(input_file_path, agent_config, api_doc):
    with open(input_file_path) as file:
        input_functions = yaml.load(file, Loader=yaml.Loader)
    prompt_format = agent_config["ChatGPT_1"]["prompt_format"]
    api_list, api_usage, api_detail_doc = get_api_doc(prompt_format, api_doc)
    # Filter upto first open paranthesis
    unique_function_names = set(
        [
            function[: function.find("(")]
            for function in input_functions["refined_response"]
        ]
    )
    documentation_for_functions = [
        api_detail_doc[name] for name in unique_function_names
    ]
    return documentation_for_functions

In [10]:
def get_input_functions(input_file_path):
    with open(input_file_path) as file:
        input_functions = yaml.load(file, Loader=yaml.Loader)
    return yaml.dump(input_functions["refined_response"])

In [11]:
def get_correct_summarization(correct_file_path):
    with open(correct_file_path) as file:
        correct_summarizations = yaml.load(file, Loader=yaml.Loader)
    return yaml.dump(correct_summarizations["intermediate response"])

In [12]:
def create_path_if_non_existing(path):
    pathlib.Path(path).mkdir(parents=True, exist_ok=True)

In [13]:
def save_prompt(prompt, test_input_file_path, agent_config, few_shot_count):
    prompt_filename = test_input_file_path.split('\\')[-1].split('.')[0] + '_prompt.txt'
    agent_name = agent_config["ChatGPT_1"]["model_name"]
    create_path_if_non_existing(f"{OUTPUT_PATH}/{agent_name}/prompts/{few_shot_count}_shot")
    with open(f"{OUTPUT_PATH}/{agent_name}/prompts/{few_shot_count}_shot/{prompt_filename}", "w") as file:
        file.write(prompt)

In [14]:
def save_gpt_response(predicted_instructions, test_input_file_path, agent_config, few_shot_count):
    gpt_response_filename = test_input_file_path.split('\\')[-1].split('.')[0] + "_gpt_response.yaml"
    gpt_response = {"gpt_response": predicted_instructions}
    agent_name = agent_config["ChatGPT_1"]["model_name"]
    create_path_if_non_existing(f"{OUTPUT_PATH}/{agent_name}/gpt_responses/{few_shot_count}_shot")
    with open(f"{OUTPUT_PATH}/{agent_name}/gpt_responses/{few_shot_count}_shot/{gpt_response_filename}", "w") as file:
        yaml.dump(gpt_response, file)

## Setup and Initializations

In [15]:
agent_config = set_agent_configuration(configuration_file_path=CONFIG_FILE)
api_doc = get_api_documentation(agent_config=agent_config)
xw_backend = get_excel_backend(agent_config=agent_config, api_doc=api_doc)

In [16]:
path_generator = create_path_generator(file_path=f"{OUTPUT_PATH}/refined_responses/")
input_file_paths = [str(path) for path in path_generator]
# input_file_paths

In [17]:
path_generator = create_path_generator(file_path=f"{OUTPUT_PATH}/refined_responses/")
excel_file_names = get_source_excel_filenames(path_generator=path_generator)
excel_file_paths = [
    f"../dataset/task_sheets/{file_name}" for file_name in excel_file_names
]
# excel_file_paths

## Example for Few-shot Learning

In [18]:
no_of_examples = 5

In [19]:
few_shot_examples = []
for example_index in range(no_of_examples):
    example_src_file_path = excel_file_paths[example_index]
    example_input_file_path = input_file_paths[example_index]

    path_generator = create_path_generator(
        file_path=f"{OUTPUT_PATH}/intermediate_responses/"
    )
    correct_response_file_paths = [str(path) for path in path_generator]

    example_correct_file_path = correct_response_file_paths[example_index]

    example_sheet_state = get_sheet_state(
        file_path=example_src_file_path, backend=xw_backend
    )

    example = (
        "USER\n"
        "{input_example}\n"
        "Here is the supplementary documentation you can reference:\n"
        "{documentation_example}\n"
        "Here is the corresponding sheet state:\n"
        "{sheet_state_example}\n\n"
        "ASSISTANT\n"
        "{correct_example}\n"
    )

    example = example.format(
        input_example=get_input_functions(example_input_file_path),
        documentation_example=extract_docs_for_input_functions(
            input_file_path=example_input_file_path,
            agent_config=agent_config,
            api_doc=api_doc,
        ),
        correct_example=get_correct_summarization(
            correct_file_path=example_correct_file_path
        ),
        sheet_state_example=get_sheet_state(
            file_path=example_src_file_path, backend=xw_backend
        ),
    )
    few_shot_examples.append(example)

## Test set (except one-shot example)

In [20]:
async def call_chat_gpt(prompt):
    try:
        chatbot = ChatGPT(agent_config["ChatGPT_1"], context=[], interaction_mode=True)
        response = await chatbot(prompt)
    except Exception as e:
        print(f"error occurs when parsing response: {e}")
    else:
        return response

In [21]:
for test_index in tqdm(range(11, 21)):
    test_input_file_path = input_file_paths[test_index]
    test_src_file_path = excel_file_paths[test_index]

    # Create prompt
    prompt = (
        "SYSTEM\n"
        "Summarize the each sub-step of instructions into explanations in natural language. "
        "Be brief and do not provide verbose explanations."
        "Avoid redundant steps and provide minimal steps\n\n"
        "{few_shot_examples}\n"
        "USER\n"
        "{actual_input}\n"
        "Here is the supplementary documentation you can reference:\n"
        "{actual_documentation}\n"
        "Here is the corresponding sheet state:\n"
        "{actual_sheet_state}\n"
    )

    # Format the prompt
    prompt = prompt.format(
        few_shot_examples = "\n".join(few_shot_examples),
        actual_input=get_input_functions(test_input_file_path),
        actual_documentation=extract_docs_for_input_functions(
            input_file_path=test_input_file_path,
            agent_config=agent_config,
            api_doc=api_doc,
        ),
        actual_sheet_state=get_sheet_state(
            file_path=test_src_file_path, backend=xw_backend
        ),
    )

    save_prompt(prompt, test_input_file_path, agent_config, no_of_examples)

    # Get GPT response
    response = await call_chat_gpt(prompt)

    predicted_instructions = response.split("\n")
    predicted_instructions = [instruction[2:] for instruction in predicted_instructions]
    save_gpt_response(predicted_instructions, test_input_file_path, agent_config, no_of_examples)

100%|██████████| 10/10 [00:39<00:00,  3.92s/it]
