## Import required libraries and packages

In [6]:
import yaml
import os
import pathlib
import time
from Agent.xwAPI import xwBackend
from utils.construct_prompt import get_api_doc
from utils.ChatGPT import ChatGPT
from tqdm import tqdm

In [2]:
CONFIG_FILE = "./config/config.yaml"
OUTPUT_PATH = "../output_dir"

## Helper functions

In [3]:
def set_agent_configuration(configuration_file_path):
    with open(configuration_file_path, mode="r") as file:
        config = yaml.load(file, Loader=yaml.Loader)

    agent_config = config["Agent"]
    agent_config["ChatGPT_1"]["api_keys"] = [os.environ["OPENAI_API_KEY"]]
    return agent_config

In [4]:
def get_api_documentation(agent_config):
    with open(agent_config["api_doc_path"]) as f:
        api_doc = yaml.load(f, Loader=yaml.FullLoader)
    return api_doc

In [5]:
def get_excel_backend(agent_config, api_doc):
    if agent_config["API_backend"] == "xw":
        xw_backend = xwBackend(agent_config["APP_backend"], api_doc)
    return xw_backend

In [6]:
def create_path_generator(file_path):
    return pathlib.Path(f"{file_path}").glob("**/*")

In [7]:
def get_source_excel_filenames(path_generator):
    response_log_paths = [
        str(path).split("\\")[-1] for path in path_generator if path.is_file()
    ]
    return [f"{log_file[2:-5]}.xlsx" for log_file in response_log_paths]

In [8]:
def get_sheet_state(file_path, backend):
    if file_path is not None:
        time.sleep(0.5)
        backend.OpenWorkbook(file_path)

    return backend.GetSheetsState()

In [9]:
def extract_docs_for_input_functions(input_file_path, agent_config, api_doc):
    with open(input_file_path) as file:
        input_functions = yaml.load(file, Loader=yaml.Loader)
    prompt_format = agent_config["ChatGPT_1"]["prompt_format"]
    api_list, api_usage, api_detail_doc = get_api_doc(prompt_format, api_doc)
    # Filter upto first open paranthesis
    unique_function_names = set(
        [
            function[: function.find("(")]
            for function in input_functions["refined_response"]
        ]
    )
    documentation_for_functions = [
        api_detail_doc[name] for name in unique_function_names
    ]
    return documentation_for_functions

In [10]:
def get_input_functions(input_file_path):
    with open(input_file_path) as file:
        input_functions = yaml.load(file, Loader=yaml.Loader)
    return yaml.dump(input_functions["refined_response"])

In [11]:
def get_correct_summarization(correct_file_path):
    with open(correct_file_path) as file:
        correct_summarizations = yaml.load(file, Loader=yaml.Loader)
    return yaml.dump(correct_summarizations["intermediate response"])

In [12]:
def save_prompt(prompt, test_input_file_path):
    prompt_filename = test_input_file_path.split('\\')[-1].split('.')[0] + '_prompt.txt'
    with open(f"{OUTPUT_PATH}/prompts/{prompt_filename}", "w") as file:
        file.write(prompt)

In [13]:
def save_gpt_response(predicted_instructions, test_input_file_path):
    gpt_response_filename = test_input_file_path.split('\\')[-1].split('.')[0] + "_gpt_response.yaml"
    gpt_response = {"gpt_response": predicted_instructions}
    with open(f"{OUTPUT_PATH}/gpt_responses/{gpt_response_filename}", "w") as file:
        yaml.dump(gpt_response, file)

## Setup and Initializations

In [14]:
agent_config = set_agent_configuration(configuration_file_path=CONFIG_FILE)
api_doc = get_api_documentation(agent_config=agent_config)
xw_backend = get_excel_backend(agent_config=agent_config, api_doc=api_doc)

In [15]:
path_generator = create_path_generator(file_path=f"{OUTPUT_PATH}/refined_responses/")
input_file_paths = [str(path) for path in path_generator]
# input_file_paths

In [16]:
path_generator = create_path_generator(file_path=f"{OUTPUT_PATH}/refined_responses/")
excel_file_names = get_source_excel_filenames(path_generator=path_generator)
excel_file_paths = [
    f"../dataset/task_sheets/{file_name}" for file_name in excel_file_names
]
# excel_file_paths

## Example for One-shot Learning

In [17]:
example_index = 0
example_src_file_path = excel_file_paths[example_index]
example_input_file_path = input_file_paths[example_index]
# example_src_file_path, example_input_file_path

In [18]:
path_generator = create_path_generator(
    file_path=f"{OUTPUT_PATH}/intermediate_responses/"
)
correct_response_file_paths = [str(path) for path in path_generator]
# correct_response_file_paths

In [19]:
example_correct_file_path = correct_response_file_paths[example_index]
# example_correct_file_path

In [20]:
example_sheet_state = get_sheet_state(
    file_path=example_src_file_path, backend=xw_backend
)
# example_sheet_state

## Test set (except one-shot example)

In [21]:
async def call_chat_gpt(prompt):
    try:
        chatbot = ChatGPT(agent_config["ChatGPT_1"], context=[], interaction_mode=True)
        response = await chatbot(prompt)
    except Exception as e:
        print(f"error occurs when parsing response: {e}")
    else:
        return response

In [23]:
test_index = 1

In [25]:
# for test_index in tqdm(range(1, len(input_file_paths))):
test_input_file_path = input_file_paths[test_index]
test_src_file_path = excel_file_paths[test_index]

# Create prompt
prompt = (
    "SYSTEM\n"
    "Summarize the each sub-step of instructions into explanations in natural language. "
    "Be brief and do not provide verbose explanations.\n\n"
    "USER\n"
    "{input_example}\n"
    "Here is the supplementary documentation you can reference:\n"
    "{documentation_example}\n"
    "Here is the corresponding sheet state:\n"
    "{sheet_state_example}\n\n"
    "ASSISTANT\n"
    "{correct_example}\n\n"
    "USER\n"
    "{actual_input}\n"
    "Here is the supplementary documentation you can reference:\n"
    "{actual_documentation}\n"
    "Here is the corresponding sheet state:\n"
    "{actual_sheet_state}\n"
)

# Format the prompt
prompt = prompt.format(
    input_example=get_input_functions(example_input_file_path),
    documentation_example=extract_docs_for_input_functions(
        input_file_path=example_input_file_path,
        agent_config=agent_config,
        api_doc=api_doc,
    ),
    correct_example=get_correct_summarization(
        correct_file_path=example_correct_file_path
    ),
    sheet_state_example=get_sheet_state(
        file_path=example_src_file_path, backend=xw_backend
    ),
    actual_input=get_input_functions(test_input_file_path),
    actual_documentation=extract_docs_for_input_functions(
        input_file_path=test_input_file_path,
        agent_config=agent_config,
        api_doc=api_doc,
    ),
    actual_sheet_state=get_sheet_state(
        file_path=test_src_file_path, backend=xw_backend
    ),
)

# save_prompt(prompt, test_input_file_path)

# # Get GPT response
# response = await call_chat_gpt(prompt)

# predicted_instructions = response.split("\n")
# save_gpt_response(predicted_instructions, test_input_file_path)

In [26]:
prompt

'SYSTEM\nSummarize the each sub-step of instructions into explanations in natural language. Be brief and do not provide verbose explanations.\n\nUSER\n- AutoFill(source="Sheet1!B2", destination="Sheet1!B2:B122")\n- CreateChart(source="Sheet1!A1:B122", destSheet="Sheet1", chartType="XYScatterSmoothNoMarkers",\n  chartName="Acceleration vs. Hanging Mass")\n- SetChartTitle(chartName="Acceleration vs. Hanging Mass", title="Acceleration vs.\n  Hanging Mass")\n- SetChartAxis(chartName="Acceleration vs. Hanging Mass", axis="x", title="Hanging\n  Mass (m2) (kg)")\n- SetChartAxis(chartName="Acceleration vs. Hanging Mass", axis="y", title="Acceleration\n  (m/s^2)")\n\nHere is the supplementary documentation you can reference:\n[\'CreateChart(source: str, destSheet: str, chartType: str, chartName: str, XField: int = None, YField: List[int] = [])\\nArgs explanation:\\nsource (string): The range that contains the data used to create the chart.\\ndestSheet (string): The name of the sheet where the c

In [None]:
response = await call_chat_gpt()




In [24]:
print(response)

- Step 1. Write "Future Value" in cell F1 to label the new column.
- Step 2. Write a formula in cell F2 to calculate the future value based on the present value, interest rate, and compounding periods.
- Step 3. AutoFill the formula from F2 down to F5 to apply it to the other rows in the column.


In [25]:
predicted_instructions = response.split("\n")
predicted_instructions

['- Step 1. Write "Future Value" in cell F1 to label the new column.',
 '- Step 2. Write a formula in cell F2 to calculate the future value based on the present value, interest rate, and compounding periods.',
 '- Step 3. AutoFill the formula from F2 down to F5 to apply it to the other rows in the column.']

In [26]:
test_correct_file_path = correct_response_file_paths[test_index]

In [27]:
correct_instructions = get_correct_summarization(
    correct_file_path=test_correct_file_path
).split("\n")
correct_instructions = [
    instruction for instruction in correct_instructions if instruction != ""
]
correct_instructions

['- Step 1. Create a new column F.',
 '- Step 2. Write the formula for future value calculation in the first data row.',
 '- Step 3. Fill the formula down to the other rows.']