In [2]:
from utils.apps import SimpleApp
from langchain_openai import ChatOpenAI
from utils.db import get_database_schema_execute_all
from utils.helper import save_results
import os
from keys import OPENAI_KEY
os.environ['OPENAI_API_KEY'] = OPENAI_KEY

%load_ext autoreload
%autoreload 2

In [None]:
# Collect all dbs and all prompts
db_list = [x for x in os.listdir('testDBs') if '.' not in x]
prompt_list = [x for x in os.listdir('prompts') if '.txt' in x]

print(db_list[0:])
print(prompt_list[0:])

# LLM model and parameters
llm_model = ChatOpenAI(model="gpt-4o", temperature=0)

# Iterate over all dbs and prompt combinations
for db in db_list: # all dbs
# for db in db_list[0:3]: # BPI2016
# for db in db_list[3:6]: # ERP
# for db in db_list[8:9]: # P2P
# for db in db_list[9:12]: # UWV

    path_to_csv_files = 'testDBs/'+db+'/db/'
    path_to_csv_schema_file = 'testDBs/'+db+'/csv_schema.xlsx'
    path_to_groud_truth_eventlog = 'testDBs/'+db+'/ground-truth-eventlog.csv'
    db_output_dir = 'testDBs/'+db+'/example.db'
    result_output_dir = 'testDBs/'+db+'/results/'
    db_schema = get_database_schema_execute_all(path_to_csv_files = path_to_csv_files,path_to_csv_schema_file=path_to_csv_schema_file, db_output_dir= db_output_dir)

    db_specific_prompt = prompt_list.index('prompt-07-'+db.split('-')[0]+'.txt')

    # Iterate over all prompts 
    for pr in prompt_list[0:7]+prompt_list[db_specific_prompt:db_specific_prompt+1]:
        
        print('TESTCASE:',db, pr)

        file = open('prompts/'+pr, "r")
        file_txt = file.read()
        file.close()

        # Baseline prompt which is always part of the tested prompt
        prompt=(f"""Consider the following db schema: {db_schema}"""+file_txt
               +f"""Write one sql statement that returns an event log from the selected tables with the following columns: case_id, activity_id, timestamp. """
               +f"""Use quotes for identifiers."""
               +f"""Make sure that all columns of the eventlog are interpreted as varchar values."""               
               +f"""Return only the complete SQL query, leave out any other comments in the response. Return the query in plain text without markdown syntax.""")

        AgentState = {"messages": [prompt]}
        a = SimpleApp(path_to_db = db_output_dir, path_to_groud_truth_eventlog=path_to_groud_truth_eventlog, llm_model = llm_model)
        result = a.invoke(AgentState)
        save_results(chain_response=result, output_dir=result_output_dir, prompt_file=pr)

        # print('\n EXECUTED SQL STATEMENT: \n', result['agent_response'])
        # print('\n RESULTING TABLE: \n', result['sqlexecuter'])
        try:
            print([result['result']])
        except:
            print([f"""Can't calculate Precision, Recall and F1. ERROR in SQL"""])                   


In [None]:
# Plot all result figures as pdf
from utils.plots import plot_parallel_coordinates

db_options = ['P2P', 'ERP', 'UWV', 'BPI2016']
metric_options = ['precision', 'recall', 'f1']
relaxed_options = ['', 'relaxed_','textual_']

path_to_dbs = 'testDBs/'

for db in db_options:
    for metric in metric_options:
        for relaxed in relaxed_options:
            plot_parallel_coordinates(path_to_dbs, db, metric, relaxed)

In [47]:
# Save the results as tables in csv format
from utils.plots import save_result_tables

db_options = ['RunningExample', 'paperj', 'UWV', 'BPI2016']
relaxed_options = ['', 'relaxed_','textual_']

path_to_dbs = 'testDBs/'

for db in db_options:
    for relaxed in relaxed_options:
        save_result_tables(path_to_dbs, db, relaxed)