## Import required libraries and packages

In [6]:
import yaml
import os
import pathlib
import time
from Agent.xwAPI import xwBackend
from utils.construct_prompt import get_api_doc
from utils.ChatGPT import ChatGPT

## Helper functions

In [7]:
def set_agent_configuration(configuration_file_path):
    with open(configuration_file_path, mode="r") as file:
        config = yaml.load(file, Loader=yaml.Loader)

    agent_config = config["Agent"]
    agent_config["ChatGPT_1"]["api_keys"] = [os.environ["OPENAI_API_KEY"]]
    return agent_config

In [8]:
def get_api_documentation(agent_config):
    with open(agent_config["api_doc_path"]) as f:
        api_doc = yaml.load(f, Loader=yaml.FullLoader)
    return api_doc

In [4]:
def get_excel_backend(agent_config, api_doc):
    if agent_config["API_backend"] == "xw":
        xw_backend = xwBackend(agent_config["APP_backend"], api_doc)
    return xw_backend

In [5]:
def create_path_generator(file_path):
    return pathlib.Path(f"{file_path}").glob("**/*")

In [6]:
def get_source_excel_filenames(path_generator):
    response_log_paths = [
        str(path).split("\\")[-1] for path in path_generator if path.is_file()
    ]
    return [f"{log_file[2:-5]}.xlsx" for log_file in response_log_paths]

In [7]:
def get_sheet_state(file_path, backend):
    if file_path is not None:
        time.sleep(0.5)
        backend.OpenWorkbook(file_path)

    return backend.GetSheetsState()

In [8]:
def extract_docs_for_input_functions(input_file_path, agent_config, api_doc):
    with open(input_file_path) as file:
        input_functions = yaml.load(file, Loader=yaml.Loader)
    prompt_format = agent_config["ChatGPT_1"]["prompt_format"]
    api_list, api_usage, api_detail_doc = get_api_doc(prompt_format, api_doc)
    # Filter upto first open paranthesis
    unique_function_names = set(
        [
            function[: function.find("(")]
            for function in input_functions["refined_response"]
        ]
    )
    documentation_for_functions = [
        api_detail_doc[name] for name in unique_function_names
    ]
    return documentation_for_functions

In [9]:
def get_input_functions(input_file_path):
    with open(input_file_path) as file:
        input_functions = yaml.load(file, Loader=yaml.Loader)
    return yaml.dump(input_functions["refined_response"])

In [None]:
def get_correct_summarization(correct_file_path):
    with open(correct_file_path) as file:
        correct_summarizations = yaml.load(file, Loader=yaml.Loader)
    return yaml.dump(correct_summarizations["intermediate response"])

## Setup and Initializations

In [None]:
CONFIG_FILE = "./config/config.yaml"

In [None]:
agent_config = set_agent_configuration(configuration_file_path=CONFIG_FILE)
api_doc = get_api_documentation(agent_config=agent_config)
xw_backend = get_excel_backend(agent_config=agent_config, api_doc=api_doc)

In [None]:
path_generator = create_path_generator(file_path="../output_dir/refined_responses/")
input_file_paths = [str(path) for path in path_generator]
# input_file_paths

In [None]:
path_generator = create_path_generator(file_path="../output_dir/refined_responses/")
excel_file_names = get_source_excel_filenames(path_generator=path_generator)
excel_file_paths = [
    f"../dataset/task_sheets/{file_name}" for file_name in excel_file_names
]
# excel_file_paths

## Example for One-shot Learning

In [None]:
example_index = 0
example_src_file_path = excel_file_paths[example_index]
example_input_file_path = input_file_paths[example_index]
# example_src_file_path, example_input_file_path

In [16]:
path_generator = create_path_generator(
    file_path="../output_dir/intermediate_responses/"
)
correct_response_file_paths = [str(path) for path in path_generator]
# correct_response_file_paths

In [None]:
example_correct_file_path = correct_response_file_paths[example_index]
# example_correct_file_path

In [18]:
example_sheet_state = get_sheet_state(
    file_path=example_src_file_path, backend=xw_backend
)
# example_sheet_state

## Test set (except one-shot example)

In [None]:
test_index = 2
test_input_file_path = input_file_paths[test_index]
test_src_file_path = excel_file_paths[test_index]
# test_input_file_path, test_src_file_path

In [20]:
prompt = """
SYSTEM
Summarize the each sub-step of instructions into explanations in natural language. Be brief and do not provide verbose explanations.

USER
{input_example}
Here is the supplementary documentation you can reference:
{documentation_example}
Here is the corresponding sheet state:
{sheet_state_example}

ASSISTANT
{correct_example}

USER
{actual_input}
Here is the supplementary documentation you can reference:
{actual_documentation}
Here is the corresponding sheet state:
{actual_sheet_state}
"""
prompt

'\nSYSTEM\nSummarize the each sub-step of instructions into explanations in natural language. Be brief and do not provide verbose explanations.\n\nUSER\n{input_example}\nHere is the supplementary documentation you can reference:\n{documentation_example}\nHere is the corresponding sheet state:\n{sheet_state_example}\n\nASSISTANT\n{correct_example}\n\nUSER\n{actual_input}\nHere is the supplementary documentation you can reference:\n{actual_documentation}\nHere is the corresponding sheet state:\n{actual_sheet_state}\n'

In [21]:
prompt = prompt.format(
    input_example=get_input_functions(example_input_file_path),
    documentation_example=extract_docs_for_input_functions(
        input_file_path=example_input_file_path,
        agent_config=agent_config,
        api_doc=api_doc,
    ),
    correct_example=get_correct_summarization(
        correct_file_path=example_correct_file_path
    ),
    sheet_state_example=get_sheet_state(
        file_path=example_src_file_path, backend=xw_backend
    ),
    actual_input=get_input_functions(test_input_file_path),
    actual_documentation=extract_docs_for_input_functions(
        input_file_path=test_input_file_path, agent_config=agent_config, api_doc=api_doc
    ),
    actual_sheet_state=get_sheet_state(
        file_path=test_src_file_path, backend=xw_backend
    ),
)
# print(prompt)

In [22]:
async def call_chat_gpt():
    try:
        chatbot = ChatGPT(agent_config["ChatGPT_1"], context=[], interaction_mode=True)
        response = await chatbot(prompt)
    except Exception as e:
        print(f"error occurs when parsing response: {e}")
    else:
        return response

In [None]:
response = await call_chat_gpt()

In [24]:
print(response)

- Step 1. Write "Future Value" in cell F1 to label the new column.
- Step 2. Write a formula in cell F2 to calculate the future value based on the present value, interest rate, and compounding periods.
- Step 3. AutoFill the formula from F2 down to F5 to apply it to the other rows in the column.


In [25]:
predicted_instructions = response.split("\n")
predicted_instructions

['- Step 1. Write "Future Value" in cell F1 to label the new column.',
 '- Step 2. Write a formula in cell F2 to calculate the future value based on the present value, interest rate, and compounding periods.',
 '- Step 3. AutoFill the formula from F2 down to F5 to apply it to the other rows in the column.']

In [26]:
test_correct_file_path = correct_response_file_paths[test_index]

In [27]:
correct_instructions = get_correct_summarization(
    correct_file_path=test_correct_file_path
).split("\n")
correct_instructions = [
    instruction for instruction in correct_instructions if instruction != ""
]
correct_instructions

['- Step 1. Create a new column F.',
 '- Step 2. Write the formula for future value calculation in the first data row.',
 '- Step 3. Fill the formula down to the other rows.']

In [29]:
bleu = evaluate.load("bleu")
bleu_results = bleu.compute(predictions=predicted_instructions, references=correct_instructions)
bleu_results

{'bleu': 0.18217511419048013,
 'precisions': [0.5098039215686274,
  0.2916666666666667,
  0.15555555555555556,
  0.047619047619047616],
 'brevity_penalty': 1.0,
 'length_ratio': 1.275,
 'translation_length': 51,
 'reference_length': 40}

In [30]:
google_bleu = evaluate.load("google_bleu")
google_bleu_results = google_bleu.compute(predictions=predicted_instructions, references=correct_instructions)
google_bleu_results

{'google_bleu': 0.25257731958762886}

In [31]:
rouge = evaluate.load("rouge")
rouge_results = rouge.compute(predictions=predicted_instructions, references=correct_instructions)
rouge_results