In [9]:
import json
#from openai import OpenAI
import os
import re
#os.chdir("/home/god/Documents/Masters")

In [10]:
# key for sorting
def natural_keys(text):
    return [int(c) if c.isdigit() else c.lower() for c in re.split('([0-9]+)', text)]


In [4]:
#Creates the format for the jsonl file
def create_jsonl_format(subdomain, prompt, id, nl=False):
    jsonl = []
    for idx, (domain, prompt) in enumerate(zip(subdomain, prompt)):
        start_index = domain.find("(define")
        dom = domain[start_index:]
        if nl:
            ids = id+str(idx)+"_nl"
        else:
            ids = id+str(idx)
        jsonl.append({"custom_id": ids, "method":"POST", "url":"/v1/chat/completions", "body":{"model": "gpt-4o", "messages": [{"role": "system", "content": "You are a PDDL domain creation expert, you modify domains based on requested changes."},{"role": "user", "content": f"{prompt}\n\n{dom}"}],"max_tokens": 3012, "temperature": 1, "top_p": 1}})
    return jsonl

In [5]:
# For passing directly no batching
def create_jsonl_format2(subdomain, prompt, id, nl=False):
    jsonl = []
    for idx, (domain, prompt) in enumerate(zip(subdomain, prompt)):
        start_index = domain.find("(define")
        dom = domain[start_index:]
        if nl:
            ids = id+str(idx)+"_nl"
        else:
            ids = id+str(idx)
        jsonl.append({"custom_id": ids, "messages": [{"role": "system", "content": "You are a PDDL domain creation expert, you modify domains based on requested changes."},{"role": "user", "content": f"{prompt}\n\n{dom}"}]})
    return jsonl

In [6]:
# Code and natural language
#Creates the format for the jsonl file
def create_jsonl_format_code(subdomain, code, id, nl=None):
    jsonl = []
    for idx, domain in enumerate(subdomain):
        start_index = domain.find("(define")
        dom = domain[start_index:]
        if nl is not None:
            ids = id+str(idx)+"_nl"
            jsonl.append({"custom_id": ids, "method":"POST", "url":"/v1/chat/completions", "body":{"model": "gpt-4o", "messages": [{"role": "system", "content": "You are a PDDL domain creation expert, you modify provided domains to make them executable based on a given prompt and python wrapper"},{"role": "user", "content": f"{nl}\n\n{dom}\n\nThe following is the executable code for the domain:\n\n{code}"}],"max_tokens": 4000, "temperature": 1, "top_p": 1}})
        else:
            ids = id+str(idx)
            if id != 'Si' and id != 'Ti':
                jsonl.append({"custom_id": ids, "method":"POST", "url":"/v1/chat/completions", "body":{"model": "gpt-4o", "messages": [{"role": "system", "content": "You are a PDDL domain creation expert, you modify provided domains to make them executable based on the codebase."},{"role": "user", "content": f"Modify the following domain based on the given code so that all the functions in the code match the actions of the domain taking into effect preconditions and effects.\n\n{dom}\n\nThe following is the executable code for the domain:\n\n{code}"}],"max_tokens": 4000, "temperature": 1, "top_p": 1}})
            else:
                jsonl.append({"custom_id": ids, "method":"POST", "url":"/v1/chat/completions", "body":{"model": "gpt-4o", "messages": [{"role": "system", "content": "You are a PDDL domain creation expert, you modify provided domains to make them executable based on the codebase."},{"role": "user", "content": f"Modify the following domain based on the given code so that all the functions in the code match the actions of the domain taking into effect preconditions and effects. All durative actions have a duration of 4\n\n{dom}\n\nThe following is the executable code for the domain:\n\n{code}"}],"max_tokens": 4000, "temperature": 1, "top_p": 1}})

    return jsonl

In [13]:
#Creates the Batch for passing to openai
def create_jsonl_batch(root, format_function, batch_type="prompt-only"):
    path = os.path.abspath(root)
    batch_json = []
    for domain in ["Depots", "DriverLog", "Rovers"]:
        for req in ["Numeric", "SimpleTime", "Strips", "Time"]:
            prompt_files = os.listdir(os.path.join(path,domain,req, "labels"))
            nl_prompts = sorted([prompts for prompts in prompt_files if prompts.endswith("nl.txt")], key=natural_keys)
            reg_prompts = sorted([prompts for prompts in prompt_files if not prompts.endswith("nl.txt")], key=natural_keys)
            
            subdomain_files = os.listdir(os.path.join(path,domain,req, "subdomains"))
            subdomains = [subdomain for subdomain in subdomain_files if "sub" in subdomain]
            subdomains = sorted(subdomains, key=natural_keys)
            
            if batch_type == "prompt-and-code":
                with open(os.path.join(path,domain,req, "signature.py"), "r") as f:
                    code = f.read()

            sub_prompt = []
            for subdomain in subdomains:
                with open(os.path.join(path,domain,req, "subdomains", subdomain), "r") as f:
                    sub_prompt.append(f.read())
            
            nl_pr = []
            for prompt in nl_prompts:
                with open(os.path.join(path,domain,req, "labels", prompt), "r") as f:
                    nl_pr.append(f.read())
                    
            reg_pr = []
            for prompt in reg_prompts:
                with open(os.path.join(path,domain,req, "labels", prompt), "r") as f:
                    reg_pr.append(f.read())

            print("creating jsonl file for ", domain, req)
            if batch_type == "prompt-only":
                nl_jsonl = format_function(nl_pr, f"{domain}_{req[:2]}", True)
                reg_jsonl = format_function(reg_pr, f"{domain}_{req[:2]}")
                
            elif batch_type == "prompt-and-code":
                nl_jsonl = format_function(sub_prompt, code, f"{domain}_{req[:2]}_code", nl_pr)
                reg_jsonl = format_function(sub_prompt, code, f"{domain}_{req[:2]}_code") 
                
            batch_json.extend(reg_jsonl)
            batch_json.extend(nl_jsonl)

    with open(f"results/batch-{batch_type}.jsonl", "w") as file:
        for line in batch_json:
            file.write(json.dumps(line))
            file.write("\n")

In [8]:
#Function to upload batch data to openai
def save_batch_info(name, data):
    with open("batch_info.json", "w") as file:
        json.dump({name: data}, file)
def upload_batch(client, jsonl_file):
    batch_file_input = client.files.create(
        file=open(jsonl_file, "rb"),
        purpose="batch"
    )
    #save_batch_info("openai_batch_info", batch_file_input)
    print("Batch file uploaded successfully")
    return batch_file_input 

In [9]:
#Function to start inference on the batch
def start_run(client, input_file_id, description = "Evaluation run with no code"):
    run = client.batches.create(
        input_file_id=input_file_id,
        endpoint="/v1/chat/completions",
        completion_window="24h",
        metadata={"description": description}    
    )
    try:
        save_batch_info("openai_run_info", run)
    except:
        print("Error saving run info")
    print("Run started successfully")
    return run

In [212]:
# Create Batch no code
path = "data/IPC3/Tests1"
create_jsonl_batch(path, create_jsonl_format, "prompt-only")

creating jsonl file for  Depots Numeric
creating jsonl file for  Depots SimpleTime
creating jsonl file for  Depots Strips
creating jsonl file for  Depots Time
creating jsonl file for  DriverLog Numeric
creating jsonl file for  DriverLog SimpleTime
creating jsonl file for  DriverLog Strips
creating jsonl file for  DriverLog Time
creating jsonl file for  Rovers Numeric
creating jsonl file for  Rovers SimpleTime
creating jsonl file for  Rovers Strips
creating jsonl file for  Rovers Time


In [29]:
# Batch with code
path = "data/IPC3/Tests1"
create_jsonl_batch(path, create_jsonl_format_code, "prompt-and-code")

creating jsonl file for  Depots Numeric
creating jsonl file for  Depots SimpleTime
creating jsonl file for  Depots Strips
creating jsonl file for  Depots Time
creating jsonl file for  DriverLog Numeric
creating jsonl file for  DriverLog SimpleTime
creating jsonl file for  DriverLog Strips
creating jsonl file for  DriverLog Time
creating jsonl file for  Rovers Numeric
creating jsonl file for  Rovers SimpleTime
creating jsonl file for  Rovers Strips
creating jsonl file for  Rovers Time


In [14]:
with open("EvaluationPipeline/OPENAI_KEY.json", 'r') as json_file:
        data = json.load(json_file)
        client = OpenAI(api_key=data['openai_key'])

In [213]:
with open("results/batch-prompt-only.jsonl", "r") as file:
    batch = file.readlines()
    batch = [json.loads(line) for line in batch]
for i in range(5):
    print(batch[i])

{'custom_id': 'Depots_Nu0_nl', 'method': 'POST', 'url': '/v1/chat/completions', 'body': {'model': 'gpt-4o', 'messages': [{'role': 'system', 'content': 'You are a PDDL domain creation expert, you modify domains based on requested changes.'}, {'role': 'user', 'content': 'Modify the following PDDL domain by \nadding a load_limit to the truck. \n\nThis will require adding 3 functions: \nload_limit per truck, current_load per truck and the weight per crate\n\nModify the load and unload actions accordingly\n\n\n(define (domain Depot)\n(:requirements :typing :fluents)\n(:types place locatable - object\n\tdepot distributor - place\n        truck hoist surface - locatable\n        pallet crate - surface)\n\n(:predicates (at ?x - locatable ?y - place) \n             (on ?x - crate ?y - surface)\n             (in ?x - crate ?y - truck)\n             (lifting ?x - hoist ?y - crate)\n             (available ?x - hoist)\n             (clear ?x - surface)\n)\n\n(:functions  \n\t(fuel-cost)\n)\n\t\n(:

In [30]:
with open("results/batch-prompt-and-code.jsonl", "r") as file:
    batch = file.readlines()
    batch = [json.loads(line) for line in batch]
for i in range(5):
    print(batch[i])

{'custom_id': 'Depots_Nu_code0', 'method': 'POST', 'url': '/v1/chat/completions', 'body': {'model': 'gpt-4o', 'messages': [{'role': 'system', 'content': 'You are a PDDL domain creation expert, you modify provided domains to make them executable based on the codebase.'}, {'role': 'user', 'content': 'Modify the following domain based on the given code so that all the functions in the code match the actions of the domain taking into effect preconditions and effects.\n\n(define (domain Depot)\n(:requirements :typing :fluents)\n(:types place locatable - object\n\tdepot distributor - place\n        truck hoist surface - locatable\n        pallet crate - surface)\n\n(:predicates (at ?x - locatable ?y - place) \n             (on ?x - crate ?y - surface)\n             (in ?x - crate ?y - truck)\n             (lifting ?x - hoist ?y - crate)\n             (available ?x - hoist)\n             (clear ?x - surface)\n)\n\n(:functions  \n\t(fuel-cost)\n)\n\t\n(:action Drive\n:parameters (?x - truck ?y

In [31]:
#Upload batch data to openai
#batch_file_input = upload_batch(client, "batch-prompt-only.jsonl")
batch_file_input = upload_batch(client, "results/batch-prompt-and-code.jsonl")

Batch file uploaded successfully


In [32]:
batch_file_input.id

'file-aZZJl14iAW4aqrDg54XrIAcN'

In [33]:
batch_info = start_run(client, batch_file_input.id)

Error saving run info
Run started successfully


In [34]:
# Check batch ID
#print(batch_info)
print(batch_info.id)
batch_id = batch_info.id

batch_jf27rkCxUiIyfUvpKzZwVvuV


In [15]:
# check current status of the run
status = client.batches.retrieve("batch_jf27rkCxUiIyfUvpKzZwVvuV")
print(f"Status: {status.status}\nOutput_id: {status.output_file_id}\nBatchRequestsCount: {status.request_counts}")

Status: completed
Output_id: file-EB4vdCapd3zcd8AP5c4ZWsZL
BatchRequestsCount: BatchRequestCounts(completed=274, failed=0, total=274)


In [16]:
#Getting results fromt the run
content = client.files.content(status.output_file_id)
file_contents = content.content
contents = file_contents.decode("utf-8")
json_content = []
for line in contents.split("\n"):
    if line:
        json_content.append(json.loads(line))

In [17]:
def save_results(json_content, name = "results.jsonl"):
    with open(f"results/{name}", "w") as file:
            for line in json_content:
                file.write(json.dumps(line))
                file.write("\n")

In [18]:
#save_results(json_content)
save_results(json_content, "results_code.jsonl")

In [21]:
#Extract domains
def extract_domain(jsonl_file):
    with open(f"results/{jsonl_file}", "r") as file:
        jsonl = file.readlines()
        jsonl = [json.loads(line) for line in jsonl]
    domains = {}
    for line in jsonl:
            domain = line["response"]["body"]["choices"][0]["message"]["content"].split("```")[1]
            domain = domain[domain.index("("):]
            tag = line["custom_id"]
            domains[tag] = domain
    return domains

In [22]:
#domains = extract_domain("results.jsonl")
domains = extract_domain("results_code.jsonl")

In [23]:
print(domains.keys())

dict_keys(['Depots_Nu_code0', 'Depots_Nu_code1', 'Depots_Nu_code2', 'Depots_Nu_code3', 'Depots_Nu_code4', 'Depots_Nu_code5', 'Depots_Nu_code6', 'Depots_Nu_code7', 'Depots_Nu_code8', 'Depots_Nu_code9', 'Depots_Nu_code10', 'Depots_Nu_code11', 'Depots_Nu_code0_nl', 'Depots_Nu_code1_nl', 'Depots_Nu_code2_nl', 'Depots_Nu_code3_nl', 'Depots_Nu_code4_nl', 'Depots_Nu_code5_nl', 'Depots_Nu_code6_nl', 'Depots_Nu_code7_nl', 'Depots_Nu_code8_nl', 'Depots_Nu_code9_nl', 'Depots_Nu_code10_nl', 'Depots_Nu_code11_nl', 'Depots_Si_code0', 'Depots_Si_code1', 'Depots_Si_code2', 'Depots_Si_code3', 'Depots_Si_code4', 'Depots_Si_code5', 'Depots_Si_code6', 'Depots_Si_code7', 'Depots_Si_code8', 'Depots_Si_code9', 'Depots_Si_code10', 'Depots_Si_code0_nl', 'Depots_Si_code1_nl', 'Depots_Si_code2_nl', 'Depots_Si_code3_nl', 'Depots_Si_code4_nl', 'Depots_Si_code5_nl', 'Depots_Si_code6_nl', 'Depots_Si_code7_nl', 'Depots_Si_code8_nl', 'Depots_Si_code9_nl', 'Depots_Si_code10_nl', 'Depots_St_code0', 'Depots_St_code1', 'D

In [24]:
#Save PDDL results
def save_pddl(domains, path, subfolder="AI_gen"):
    for key, value in domains.items():
        if key.startswith("Depots"):
            if "Nu" in key:
                with open(os.path.join(path, "Depots", "Numeric", subfolder, f"{key}.pddl"), "w") as file:
                    file.write(value)
            elif "Si" in key:
                with open(os.path.join(path, "Depots", "SimpleTime", subfolder, f"{key}.pddl"), "w") as file:
                    file.write(value)
            elif "St" in key:
                with open(os.path.join(path, "Depots", "Strips", subfolder, f"{key}.pddl"), "w") as file:
                    file.write(value)
            elif "Ti" in key:
                with open(os.path.join(path, "Depots", "Time", subfolder, f"{key}.pddl"), "w") as file:
                    file.write(value)
        elif key.startswith("DriverLog"):
            if "Nu" in key:
                with open(os.path.join(path, "DriverLog", "Numeric", subfolder, f"{key}.pddl"), "w") as file:
                    file.write(value)
            elif "Si" in key:
                with open(os.path.join(path, "DriverLog", "SimpleTime", subfolder, f"{key}.pddl"), "w") as file:
                    file.write(value)
            elif "St" in key:
                with open(os.path.join(path, "DriverLog", "Strips", subfolder, f"{key}.pddl"), "w") as file:
                    file.write(value)
            elif "Ti" in key:
                with open(os.path.join(path, "DriverLog", "Time", subfolder, f"{key}.pddl"), "w") as file:
                    file.write(value)
        elif key.startswith("Rovers"):
            if "Nu" in key:
                with open(os.path.join(path, "Rovers", "Numeric", subfolder, f"{key}.pddl"), "w") as file:
                    file.write(value)
            elif "Si" in key:
                with open(os.path.join(path, "Rovers", "SimpleTime", subfolder, f"{key}.pddl"), "w") as file:
                    file.write(value)
            elif "St" in key:
                with open(os.path.join(path, "Rovers", "Strips", subfolder, f"{key}.pddl"), "w") as file:
                    file.write(value)
            elif "Ti" in key:
                with open(os.path.join(path, "Rovers", "Time", subfolder, f"{key}.pddl"), "w") as file:
                    file.write(value)
    print(f"PDDL files saved successfully in {path}")

In [25]:
path = "data/IPC3/Tests1"
path = os.path.abspath(path)
#save_pddl(domains, path)
save_pddl(domains, path, "AI_gen_code")

PDDL files saved successfully in /home/god/Documents/Masters/data/IPC3/Tests1


# Report file
## Syntax
- Domain
    - Requirement
        - Total syntax errors
        - Syntax errors natural language
        - Syntax errors regular
        - total success

## Plan
- Domain
    - Requirement
        - Failed plan generations natural language
        - Failed plan generations regular
        - Total failed plan generations
        - Total success

In [36]:
#Run in python to get outputs
import json
import os
import subprocess
#subprocess.os.chdir("/home/god/Documents/Masters")
subprocess.os.chdir("C:/Users/Nicho/OneDrive/Documents/Masters/")

#syntax_path = "results/syntax-AI_gen.jsonl"
syntax_path = "results/syntax-AI_gen_code.jsonl"

with open(syntax_path, "r") as file:
    syntax_results = file.readlines()
    syntax_results = [json.loads(line) for line in syntax_results]

print(syntax_results[0])



In [41]:
#Process results
Depots = {"Numeric": 0, "SimpleTime": 0, "Strips": 0, "Time": 0}
DriverLog = {"Numeric": 0, "SimpleTime": 0, "Strips": 0, "Time": 0}
Rovers = {"Numeric": 0, "SimpleTime": 0, "Strips": 0, "Time": 0}
syntax_evaluation = {"Depots": Depots, "DriverLog": DriverLog, "Rovers": Rovers}
natural_language_errors = 0
invalid_syntax_files = []
warning_syntax_files = []
unknown_error_syntax_files = []
for result in syntax_results:
    if result["errors"] > 0:
        syntax_evaluation[result["domain"]][result["requirement"]] += 1
        invalid_syntax_files.append(result["file"])
        if "_nl" in result["file"]:
            natural_language_errors += 1
    elif result["warnings"] > 0:
        warning_syntax_files.append(result["file"])
        syntax_evaluation[result["domain"]][result["requirement"]] += 1
        if "_nl" in result["file"]:
            natural_language_errors += 1    
    elif result["errors"] == -1:
        unknown_error_syntax_files.append(result["file"])
        syntax_evaluation[result["domain"]][result["requirement"]] += 1
        if "_nl" in result["file"]:
            natural_language_errors += 1
total_numerics = sum([syntax_evaluation["Depots"]["Numeric"], syntax_evaluation["DriverLog"]["Numeric"], syntax_evaluation["Rovers"]["Numeric"]])
total_simpletimes = sum([syntax_evaluation["Depots"]["SimpleTime"], syntax_evaluation["DriverLog"]["SimpleTime"], syntax_evaluation["Rovers"]["SimpleTime"]])
total_strips = sum([syntax_evaluation["Depots"]["Strips"], syntax_evaluation["DriverLog"]["Strips"], syntax_evaluation["Rovers"]["Strips"]])
total_time = sum([syntax_evaluation["Depots"]["Time"], syntax_evaluation["DriverLog"]["Time"], syntax_evaluation["Rovers"]["Time"]])

Total_depot = sum([syntax_evaluation["Depots"]["Numeric"], syntax_evaluation["Depots"]["SimpleTime"], syntax_evaluation["Depots"]["Strips"], syntax_evaluation["Depots"]["Time"]])
Total_DriverLog = sum([syntax_evaluation["DriverLog"]["Numeric"], syntax_evaluation["DriverLog"]["SimpleTime"], syntax_evaluation["DriverLog"]["Strips"], syntax_evaluation["DriverLog"]["Time"]])
Total_Rovers = sum([syntax_evaluation["Rovers"]["Numeric"], syntax_evaluation["Rovers"]["SimpleTime"], syntax_evaluation["Rovers"]["Strips"], syntax_evaluation["Rovers"]["Time"]])

total = sum([Total_depot, Total_DriverLog, Total_Rovers])

percentage_errors = "{:.2f}".format((total/270)*100)

regular_expression_errors = total - natural_language_errors
regular_expression_percentage = "{:.2f}".format((regular_expression_errors/270*2)*100)
natural_language_error_percentage = "{:.2f}".format((natural_language_errors/270*2)*100)
        


In [42]:
print(f"Total errors: {total}")
print(f"Total errors percentage: {percentage_errors}%")
print(f"Successful modification without automated corrections: {'{:.2f}'.format(100-float(percentage_errors))}%\n")
print(f"Total numeric errors: {total_numerics}")
print(f"Total simpletime errors: {total_simpletimes}")
print(f"Total strips errors: {total_strips}")
print(f"Total time errors: {total_time}\n")

print(f"Total Depot errors: {Total_depot}")
print(f"Total DriverLog errors: {Total_DriverLog}")
print(f"Total Rovers errors: {Total_Rovers}\n")

print(f"Total regular expression errors: {regular_expression_errors}")
print(f"Regular expression errors percentage: {regular_expression_percentage}%")
print(f"Natural language errors: {natural_language_errors}")
print(f"Total natural language error percentage: {natural_language_error_percentage}%\n\n")

print("Files with invalid syntax", invalid_syntax_files)
print("Files with warnings", warning_syntax_files)
print("Files with unknown errors", unknown_error_syntax_files)

Total errors: 36
Total errors percentage: 13.33%
Successful modification without automated corrections: 86.67%

Total numeric errors: 9
Total simpletime errors: 5
Total strips errors: 9
Total time errors: 13

Total Depot errors: 9
Total DriverLog errors: 8
Total Rovers errors: 19

Total regular expression errors: 16
Regular expression errors percentage: 11.85%
Natural language errors: 20
Total natural language error percentage: 14.81%


Files with invalid syntax ['Depots_Ti_code4_nl.pddl', 'Depots_Ti_code3_nl.pddl', 'Depots_Ti_code8.pddl', 'DriverLog_Nu_code7.pddl', 'DriverLog_Ti_code11.pddl', 'Rovers_St_code3_nl.pddl', 'Rovers_Ti_code7.pddl', 'Rovers_Ti_code6_nl.pddl', 'Rovers_Ti_code11.pddl', 'Rovers_Ti_code12_nl.pddl']
Files with unknown errors ['Depots_Nu_code2.pddl', 'Depots_Nu_code5.pddl', 'Depots_Nu_code0.pddl', 'Depots_Si_code5.pddl', 'Depots_St_code6_nl.pddl', 'DriverLog_Si_code9_nl.pddl', 'DriverLog_St_code2_nl.pddl', 'DriverLog_St_code0_nl.pddl', 'DriverLog_Ti_code6_nl.pddl'

In [33]:
#Extract runs and link them with subdomains

subprocess.os.chdir("C:/Users/Nicho/OneDrive/Documents/Masters/")

path = "C:/Users/Nicho/OneDrive/Documents/Masters/"
data_path = "data/IPC3/Tests1"

#generated_domains_path = "AI_gen"
#plan_paths = "runs/AI_gen"
generated_domains_path = "AI_gen_code"
plan_paths = "runs/AI_gen_code"

run_counter = 0
Depots = {"Numeric": 0, "SimpleTime": 0, "Strips": 0, "Time": 0, "Numeric_nl": 0, "SimpleTime_nl": 0, "Strips_nl": 0, "Time_nl": 0}
DriverLog = {"Numeric": 0, "SimpleTime": 0, "Strips": 0, "Time": 0, "Numeric_nl": 0, "SimpleTime_nl": 0, "Strips_nl": 0, "Time_nl": 0}
Rovers = {"Numeric": 0, "SimpleTime": 0, "Strips": 0, "Time": 0, "Numeric_nl": 0, "SimpleTime_nl": 0, "Strips_nl": 0, "Time_nl": 0}
plan_evaluation = {"Depots": Depots, "DriverLog": DriverLog, "Rovers": Rovers}
total = 0
natural_language_errors = 0
problem_domains = {}
timeout_domains = {}
training_errors = {}
run_type_counter = {"Depots":0, "DriverLog":0, "Rovers":0}
timeout_errors = 0
total_domain_count = 0

total_runs = len(os.listdir(os.path.join(path,plan_paths)))

for domain in ["Depots", "DriverLog", "Rovers"]:
    for req in ["Numeric", "SimpleTime", "Strips", "Time"]:
        subdomain_files = os.listdir(os.path.join(path,data_path,domain,req,generated_domains_path))
        subdomain_files = sorted(subdomain_files, key=natural_keys)
        num_problems = len(os.listdir(os.path.join(path,data_path,domain,req, "problems")))
        for subdomain in subdomain_files:
            if subdomain in invalid_syntax_files:
                continue
            total_domain_count += 1
            for _ in range(num_problems):
                run_counter += 1
                run_type_counter[domain] += 1
                if run_counter < total_runs+1:
                    with open(os.path.join(path,plan_paths,"run_"+str(run_counter)+".txt"), "r") as f:
                        plan = f.read()
                    if "Solution Found" not in plan:
                        if "Execution error" not in plan:
                            total += 1
                            if subdomain not in list(problem_domains.keys()):
                                problem_domains[subdomain] = [run_counter]
                            else:
                                problem_domains[subdomain].append(run_counter)
                            if "_nl" in subdomain:
                                plan_evaluation[domain][req+"_nl"] += 1
                                natural_language_errors += 1
                            else:
                                plan_evaluation[domain][req] += 1
                            training_errors[subdomain] = {run_counter: plan}
                        else:
                            timeout_errors += 1
                            if subdomain not in list(timeout_domains.keys()):
                                timeout_domains[subdomain] = [run_counter]
                            else:
                                timeout_domains[subdomain].append(run_counter)

                else:
                    print(f"Run counter exceeds number of runs by {str(run_counter-total_runs)}\n")

total_runs = total_runs - timeout_errors                

percentage_errors = "{:.2f}".format((total/total_runs)*100)
regular_expression_errors = total - natural_language_errors

regular_expression_percentage = "{:.2f}".format((regular_expression_errors/(total_runs/2))*100)
natural_language_errors_percentage = "{:.2f}".format((natural_language_errors/(total_runs/2))*100)

total_numerics = sum([plan_evaluation["Depots"]["Numeric"], plan_evaluation["DriverLog"]["Numeric"], plan_evaluation["Rovers"]["Numeric"], plan_evaluation["Depots"]["Numeric_nl"], plan_evaluation["DriverLog"]["Numeric_nl"], plan_evaluation["Rovers"]["Numeric_nl"]])
total_simpletimes = sum([plan_evaluation["Depots"]["SimpleTime"], plan_evaluation["DriverLog"]["SimpleTime"], plan_evaluation["Rovers"]["SimpleTime"], plan_evaluation["Depots"]["SimpleTime_nl"], plan_evaluation["DriverLog"]["SimpleTime_nl"], plan_evaluation["Rovers"]["SimpleTime_nl"]])
total_strips = sum([plan_evaluation["Depots"]["Strips"], plan_evaluation["DriverLog"]["Strips"], plan_evaluation["Rovers"]["Strips"], plan_evaluation["Depots"]["Strips_nl"], plan_evaluation["DriverLog"]["Strips_nl"], plan_evaluation["Rovers"]["Strips_nl"]])
total_time = sum([plan_evaluation["Depots"]["Time"], plan_evaluation["DriverLog"]["Time"], plan_evaluation["Rovers"]["Time"], plan_evaluation["Depots"]["Time_nl"], plan_evaluation["DriverLog"]["Time_nl"], plan_evaluation["Rovers"]["Time_nl"]])

Total_depot = sum([plan_evaluation["Depots"]["Numeric"], plan_evaluation["Depots"]["SimpleTime"], plan_evaluation["Depots"]["Strips"], plan_evaluation["Depots"]["Time"], plan_evaluation["Depots"]["Numeric_nl"], plan_evaluation["Depots"]["SimpleTime_nl"], plan_evaluation["Depots"]["Strips_nl"], plan_evaluation["Depots"]["Time_nl"]])
Total_DriverLog = sum([plan_evaluation["DriverLog"]["Numeric"], plan_evaluation["DriverLog"]["SimpleTime"], plan_evaluation["DriverLog"]["Strips"], plan_evaluation["DriverLog"]["Time"], plan_evaluation["DriverLog"]["Numeric_nl"], plan_evaluation["DriverLog"]["SimpleTime_nl"], plan_evaluation["DriverLog"]["Strips_nl"], plan_evaluation["DriverLog"]["Time_nl"]])
Total_Rovers = sum([plan_evaluation["Rovers"]["Numeric"], plan_evaluation["Rovers"]["SimpleTime"], plan_evaluation["Rovers"]["Strips"], plan_evaluation["Rovers"]["Time"], plan_evaluation["Rovers"]["Numeric_nl"], plan_evaluation["Rovers"]["SimpleTime_nl"], plan_evaluation["Rovers"]["Strips_nl"], plan_evaluation["Rovers"]["Time_nl"]])

precentage_error_depot = "{:.2f}".format((Total_depot/run_type_counter["Depots"])*100)
precentage_error_driverlog = "{:.2f}".format((Total_DriverLog/run_type_counter["DriverLog"])*100)
precentage_error_rovers = "{:.2f}".format((Total_Rovers/run_type_counter["Rovers"])*100)

total_numeric_depot = sum([plan_evaluation["Depots"]["Numeric"], plan_evaluation["Depots"]["Numeric_nl"]])
total_simpletime_depot = sum([plan_evaluation["Depots"]["SimpleTime"], plan_evaluation["Depots"]["SimpleTime_nl"]])
total_strips_depot = sum([plan_evaluation["Depots"]["Strips"], plan_evaluation["Depots"]["Strips_nl"]])
total_time_depot = sum([plan_evaluation["Depots"]["Time"], plan_evaluation["Depots"]["Time_nl"]])
                       
total_numeric_driverlog = sum([plan_evaluation["DriverLog"]["Numeric"], plan_evaluation["DriverLog"]["Numeric_nl"]])
total_simpletime_driverlog = sum([plan_evaluation["DriverLog"]["SimpleTime"], plan_evaluation["DriverLog"]["SimpleTime_nl"]])
total_strips_driverlog = sum([plan_evaluation["DriverLog"]["Strips"], plan_evaluation["DriverLog"]["Strips_nl"]])
total_time_driverlog = sum([plan_evaluation["DriverLog"]["Time"], plan_evaluation["DriverLog"]["Time_nl"]])

total_numeric_rovers = sum([plan_evaluation["Rovers"]["Numeric"], plan_evaluation["Rovers"]["Numeric_nl"]])
total_simpletime_rovers = sum([plan_evaluation["Rovers"]["SimpleTime"], plan_evaluation["Rovers"]["SimpleTime_nl"]])
total_strips_rovers = sum([plan_evaluation["Rovers"]["Strips"], plan_evaluation["Rovers"]["Strips_nl"]])
total_time_rovers = sum([plan_evaluation["Rovers"]["Time"], plan_evaluation["Rovers"]["Time_nl"]])

invalid_depot_domains = len([domain for domain in problem_domains.keys() if "Depots" in domain])
invalid_driverlog_domains = len([domain for domain in problem_domains.keys() if "DriverLog" in domain])
invalid_rovers_domains = len([domain for domain in problem_domains.keys() if "Rovers" in domain])
total_invalid_domains = invalid_depot_domains + invalid_driverlog_domains + invalid_rovers_domains

invalid_depot_nl_domains = len([domain for domain in problem_domains.keys() if "Depots" in domain and "_nl" in domain])
invalid_driverlog_nl_domains = len([domain for domain in problem_domains.keys() if "DriverLog" in domain and "_nl" in domain])
invalid_rovers_nl_domains = len([domain for domain in problem_domains.keys() if "Rovers" in domain and "_nl" in domain])
total_invalid_nl_domains = invalid_depot_nl_domains + invalid_driverlog_nl_domains + invalid_rovers_nl_domains

percentage_invalid_domains = "{:.2f}".format((total_invalid_domains/total_domain_count)*100)
percentage_invalid_nl_domains = "{:.2f}".format((total_invalid_nl_domains/(total_domain_count/2))*100)
percentage_invalid_reg_domains = "{:.2f}".format(((total_invalid_domains-total_invalid_nl_domains)/(total_domain_count/2))*100)

In [34]:
print(f"Total runs: {total_runs}")
print(f"Total errors: {total}")
print(f"Total timeout errors: {timeout_errors}")
print(f"Total errors percentage: {percentage_errors}%")
print(f"Successful modification without automated corrections: {'{:.2f}'.format(100-float(percentage_errors))}%\n")
print(f"Total numeric errors: {total_numerics}")
print(f"Total simpletime errors: {total_simpletimes}")
print(f"Total strips errors: {total_strips}")
print(f"Total time errors: {total_time}\n")

print(f"Total Depot errors: {Total_depot}")
print(f"Total DriverLog errors: {Total_DriverLog}")
print(f"Total Rovers errors: {Total_Rovers}\n")

print(f"Percentage errors in Depot: {precentage_error_depot}%")
print(f"Percentage errors in DriverLog: {precentage_error_driverlog}%")
print(f"Percentage errors in Rovers: {precentage_error_rovers}%\n")

print(f"Total regular expression errors: {regular_expression_errors}")
print(f"Regular expression success percentage: {'{:.2f}'.format(100-float(regular_expression_percentage))}%")
print(f"Natural language success percentage: {'{:.2f}'.format(100-float(natural_language_errors_percentage))}%")
print(f"Total natural language errors: {natural_language_errors}\n")

print(f"Total numeric errors in Depot: {total_numeric_depot}")
print(f"Total simpletime errors in Depot: {total_simpletime_depot}")
print(f"Total strips errors in Depot: {total_strips_depot}")
print(f"Total time errors in Depot: {total_time_depot}\n")

print(f"Total numeric errors in DriverLog: {total_numeric_driverlog}")
print(f"Total simpletime errors in DriverLog: {total_simpletime_driverlog}")
print(f"Total strips errors in DriverLog: {total_strips_driverlog}")
print(f"Total time errors in DriverLog: {total_time_driverlog}\n")

print(f"Total numeric errors in Rovers: {total_numeric_rovers}")
print(f"Total simpletime errors in Rovers: {total_simpletime_rovers}")
print(f"Total strips errors in Rovers: {total_strips_rovers}")
print(f"Total time errors in Rovers: {total_time_rovers}\n")

print(f"Total invalid Depots Domains: {invalid_depot_domains}")
print(f"Total invalid DriverLog Domains: {invalid_driverlog_domains}")
print(f"Total invalid Rovers Domains: {invalid_rovers_domains}")
print(f"Total invalid Domains: {total_invalid_domains}\n")

print(f"Percentage of successful domains: {'{:.2f}'.format(100-float(percentage_invalid_domains))}%")
print(f"Percentage of successful natural language domains: {'{:.2f}'.format(100-float(percentage_invalid_nl_domains))}%")
print(f"Percentage of successful regular expression domains: {'{:.2f}'.format(100-float(percentage_invalid_reg_domains))}%")
print(f"Percentage of invalid domains: {percentage_invalid_domains}%")

Total runs: 3249
Total errors: 1228
Total timeout errors: 241
Total errors percentage: 37.80%
Successful modification without automated corrections: 62.20%

Total numeric errors: 461
Total simpletime errors: 242
Total strips errors: 253
Total time errors: 272

Total Depot errors: 380
Total DriverLog errors: 270
Total Rovers errors: 578

Percentage errors in Depot: 44.19%
Percentage errors in DriverLog: 19.15%
Percentage errors in Rovers: 47.38%

Total regular expression errors: 497
Regular expression success percentage: 69.41%
Natural language success percentage: 55.00%
Total natural language errors: 731

Total numeric errors in Depot: 176
Total simpletime errors in Depot: 60
Total strips errors in Depot: 88
Total time errors in Depot: 56

Total numeric errors in DriverLog: 120
Total simpletime errors in DriverLog: 30
Total strips errors in DriverLog: 45
Total time errors in DriverLog: 75

Total numeric errors in Rovers: 165
Total simpletime errors in Rovers: 152
Total strips errors in

In [35]:
print(problem_domains)
print(timeout_domains)
with open("results/plan_errors.txt", "w") as file:
    for problem in problem_domains.keys():
        file.write(json.dumps(problem))
        file.write("\n")
    for timeout in timeout_domains:
        file.write(json.dumps(timeout))
        file.write("\n")

{'Depots_Nu_code0.pddl': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], 'Depots_Nu_code1.pddl': [23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33], 'Depots_Nu_code1_nl.pddl': [34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], 'Depots_Nu_code2.pddl': [45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55], 'Depots_Nu_code2_nl.pddl': [56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66], 'Depots_Nu_code4.pddl': [89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99], 'Depots_Nu_code4_nl.pddl': [100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110], 'Depots_Nu_code5.pddl': [111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121], 'Depots_Nu_code6.pddl': [133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143], 'Depots_Nu_code6_nl.pddl': [144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154], 'Depots_Nu_code7.pddl': [155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165], 'Depots_Nu_code7_nl.pddl': [166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176], 'Depots_Nu_code8_nl.pddl': [188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198], 'De

In [6]:
import subprocess
import concurrent.futures

pddl_path = "/home/god/Documents/Masters/data/IPC3/Tests1"
popf_path = "/home/god/.planutils/packages/popf"
domain_folder = "AI_gen_code"
generated = True

Depots = {"Numeric": 0, "SimpleTime": 0, "Strips": 0, "Time": 0}
DriverLog = {"Numeric": 0, "SimpleTime": 0, "Strips": 0, "Time": 0}
Rovers = {"Numeric": 0, "SimpleTime": 0, "Strips": 0, "Time": 0}
plan_evaluation = {"Depots": Depots, "DriverLog": DriverLog, "Rovers": Rovers}
natural_language_errors = 0
files_with_failed_plans = []
files_timeout = []

runs = []
subprocess.os.chdir(popf_path)
total = 0
success = 0
for domain in ["Depots", "DriverLog", "Rovers"]:
    for req in ["Numeric", "SimpleTime", "Strips", "Time"]:
        if generated:
            files = sorted(os.listdir(os.path.join(pddl_path, domain, req, domain_folder)), key=natural_keys)
            for file in files:
                if file.endswith(".pddl") and file not in invalid_syntax_files:
                    problem_files = sorted(os.listdir(os.path.join(pddl_path, domain, req, "problems")), key=natural_keys)
                    for problem in problem_files:
                        plan_command = f"./run /home/god/Documents/Masters/data/IPC3/Tests1/{domain}/{req}/{domain_folder}/{file} /home/god/Documents/Masters/data/IPC3/Tests1/{domain}/{req}/problems/{problem}"
                        runs.append(plan_command)
        else:
            files = sorted(os.listdir(os.path.join(pddl_path, domain, req)))
            for file in files:
                if file.endswith(".pddl"):
                    problem_files = sorted(os.listdir(os.path.join(pddl_path, domain, req, "HandCoded", "problems")), key=natural_keys) # Remove Handcoded for base problems
                    for problem in problem_files:
                        plan_command = f"./run /home/god/Documents/Masters/data/IPC3/Tests1/{domain}/{req}/{file} /home/god/Documents/Masters/data/IPC3/Tests1/{domain}/{req}/HandCoded/problems/{problem}" # Remove Handcoded for base problems
                        runs.append(plan_command)
                    
                    
                    # try:
                    #     plan = subprocess.run(plan_command, shell=True, capture_output=True, check=True, timeout=5)
                    #     out = plan.stdout
                    #     total += 1
                    #     if "Solution Found" in out.decode("utf-8"):
                    #         success += 1
                    #     else:
                    #         files_with_failed_plans.append(file)
                    #         plan_evaluation[domain][req] += 1
                    #         if "_nl" in file:
                    #             natural_language_errors += 1
                    # except subprocess.TimeoutExpired:
                    #     files_timeout.append(file)
                    #     print("Timeout error")
                    # except subprocess.CalledProcessError:
                    #     print("Error generating plan")
  

# Total_plan_depot = sum([plan_evaluation["Depots"]["Numeric"], plan_evaluation["Depots"]["SimpleTime"], plan_evaluation["Depots"]["Strips"], plan_evaluation["Depots"]["Time"]])
# Total_plan_DriverLog = sum([plan_evaluation["DriverLog"]["Numeric"], plan_evaluation["DriverLog"]["SimpleTime"], plan_evaluation["DriverLog"]["Strips"], plan_evaluation["DriverLog"]["Time"]])
# Total_plan_Rovers = sum([plan_evaluation["Rovers"]["Numeric"], plan_evaluation["Rovers"]["SimpleTime"], plan_evaluation["Rovers"]["Strips"], plan_evaluation["Rovers"]["Time"]])

# Total_plan = sum([Total_plan_depot, Total_plan_DriverLog, Total_plan_Rovers])

# Total_plan_numerics = sum([plan_evaluation["Depots"]["Numeric"], plan_evaluation["DriverLog"]["Numeric"], plan_evaluation["Rovers"]["Numeric"]])
# Total_plan_simpletimes = sum([plan_evaluation["Depots"]["SimpleTime"], plan_evaluation["DriverLog"]["SimpleTime"], plan_evaluation["Rovers"]["SimpleTime"]])
# Total_plan_strips = sum([plan_evaluation["Depots"]["Strips"], plan_evaluation["DriverLog"]["Strips"], plan_evaluation["Rovers"]["Strips"]])
# Total_plan_time = sum([plan_evaluation["Depots"]["Time"], plan_evaluation["DriverLog"]["Time"], plan_evaluation["Rovers"]["Time"]])

# percentage_errors = "{:.2f}".format((Total_plan/total)*100)
# percentage_success = "{:.2f}".format((success/total)*100)

# Total_plan_nl_errors = natural_language_errors
# Total_plan_regular_errors = Total_plan - natural_language_errors
# Total_plan_regular_errors_percentage = "{:.2f}".format((Total_plan_regular_errors/Total_plan)*100)
# Total_plan_nl_errors_percentage = "{:.2f}".format((Total_plan_nl_errors/Total_plan)*100)
if generated:
    subprocess.os.chdir("/home/god/Documents/Masters/runs")
    with open("run_commands.txt", "w") as file:
        for run in runs:
            file.write(run)
            file.write("\n")
else: 
    subprocess.os.chdir("/home/god/Documents/Masters/runs/original")
    with open("run_commands.txt", "w") as file:
        for run in runs:
            file.write(run)
            file.write("\n")

In [299]:
import os

for file in os.listdir("/home/god/Documents/Masters/runs/AI_gen"):
    if file.endswith(".txt"):
        with open(f"/home/god/Documents/Masters/runs/{file}", "r") as f:
            lines = f.readlines()
            print(len(lines))

            

NameError: name 'Total_plan' is not defined