In [11]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from itertools import product
from util.scrape_log import scrape_search_log
from dataset.goose_domain_info import GOOSE_DOMAINS as DOMAINS
from dataset.goose_domain_info import get_domain_instance_pddl_for_domain

In [12]:
_LOG_DIR = "aaai24_logs/test"
pd.options.display.float_format = "{:,.2f}".format

In [13]:
os.system("sh collect_cluster1_logs.sh")

aaai24_logs/
aaai24_logs/test/
aaai24_logs/test/n50-s1_dd_llg_visitsome_L4_H64_mean_p10.log
aaai24_logs/test/p-l25-c25-s3_dd_llg_ferry_L8_H64_max_p10.log
aaai24_logs/test/p-n10-b3-w4-s0_dd_llg_sokoban_L12_H64_mean_p10.log
aaai24_logs/test/blocks30-task04_dd_dlg_blocks_L4_H64_mean_p10.log
aaai24_logs/test/n20-s2_dd_slg_visitsome_L4_H64_max_p10.log
aaai24_logs/test/p-n9-b2-w3-s0_dd_dlg_sokoban_L8_H64_mean_p10.log
aaai24_logs/test/n20-s1_dd_llg_visitall_L16_H64_max_p10.log
aaai24_logs/test/p-n11-b2-w3-s1_dd_slg_sokoban_L12_H64_mean_p10.log
aaai24_logs/test/n15-s3_di_slg_visitsome_L4_H64_max_p20.log
aaai24_logs/test/n15-s4_dd_llg_visitsome_L4_H64_mean_p10.log
aaai24_logs/test/p-n10-b3-w3-s2_dd_llg_sokoban_L4_H64_mean_p10.log
aaai24_logs/test/p-n8-b2-w5-s1_dd_llg_sokoban_L4_H64_max_p10.log
aaai24_logs/test/blocks20-task01_dd_dlg_blocks_L4_H64_max_p10.log
aaai24_logs/test/p-l20-c20-s5_dd_dlg_ferry_L8_H64_mean_p10.log
aaai24_logs/test/p-l50-c50-s3_dd_llg_ferry_L4_H64_mean_p10.log
aaai24_logs/

0

In [14]:
# ...goose-aaai24/learner/aaai24_logs/test/blocks15-task01_dd_llg_blocks_L4_H64_max_p10.log
MODELS = list(product(["di", "dd"], ["dlg", "slg", "flg", "llg"], [4, 8, 12, 16], ["mean", "max"]))
DOMAINS = sorted(DOMAINS)

In [15]:
data = {
    "training": [],
    "graph": [],
    "layers": [],
    "aggr": [],
    "domain": [],
    "problem": [],
    "solved": [],
    "expanded": [],
    "evaluated": [],
    "time": [],
    "cost": [],
    "first_h": [],
    "tried": [],
    "normaliser": [],
}

for (training, rep, layers, aggr), domain in product(MODELS, DOMAINS):
    for i, (_, _, pf) in enumerate(get_domain_instance_pddl_for_domain(domain, "test")):
        problem_name = os.path.basename(pf).replace(".pddl", "")
        log_file = "_".join([problem_name, training, rep, domain, f"L{layers}", "H64", aggr, "p10" if training == "dd" else "p20"])
        log_file = f"{_LOG_DIR}/{log_file}.log"
        stats = scrape_search_log(log_file)

        data["training"].append(training)
        data["graph"].append(rep)
        data["layers"].append(layers)
        data["aggr"].append(aggr)
        data["domain"].append(domain)
        data["problem"].append(i)
        data["normaliser"].append((1/90) if domain!="gripper" else (1/18))

        assert len(data) - 7 == len(stats)
        for k in stats:
            data[k].append(stats[k])

WHOLE_DF = pd.DataFrame(data=data)
# WHOLE_DF

### Selected Layer and Aggr params

In [16]:
L = 8
A = "mean"

In [17]:
def get_selected_param_df():
    df = WHOLE_DF.copy()
    df = df[df["layers"] == L]
    df = df[df["aggr"] == A]
    df["model"] = df.apply(lambda row: f"{row['training']}_{row['graph']}", axis=1)
    df = df.drop(columns=["training", "graph", "layers", "aggr"])
    return df

### Coverage for selected Layer and Aggr params

In [18]:
df = get_selected_param_df()
df = df.drop(columns=["expanded", "evaluated", "time", "cost", "first_h", "problem", "normaliser"])
df = df.groupby(["model", "domain"]).agg({"solved": "sum", "tried": "sum"}).reset_index()
df["solved"] = df.apply(lambda row: f"{row['solved']}/{row['tried']}", axis=1)
df = df.drop(columns=["tried"])

# pivot
df = df.pivot_table(index="domain", columns="model", values="solved", aggfunc="sum", fill_value=0)
column_order = ["dd_dlg", "dd_slg", "dd_flg", "dd_llg", "di_dlg", "di_slg", "di_flg", "di_llg"]
df = df[column_order]
df = df.reset_index()

df

model,domain,dd_dlg,dd_slg,dd_flg,dd_llg,di_dlg,di_slg,di_flg,di_llg
0,blocks,0/10,0/10,0/0,62/75,0/0,0/0,0/0,0/0
1,ferry,75/85,39/53,0/0,88/90,0/0,0/0,0/0,0/0
2,gripper,9/12,13/15,0/0,18/18,0/0,0/0,0/0,0/0
3,n-puzzle,10/20,0/10,0/0,0/10,0/0,0/0,0/0,0/0
4,sokoban,48/90,39/90,0/0,34/83,0/0,0/0,0/0,0/0
5,spanner,0/10,0/10,0/0,60/75,0/0,0/0,0/0,0/0
6,visitall,36/50,42/55,0/0,44/55,0/0,0/0,0/0,0/0
7,visitsome,77/90,80/90,0/0,65/75,0/0,0/0,0/0,0/0


### Coverage over all params

In [19]:
column_order = ["dd_dlg", "dd_slg", "dd_flg", "dd_llg", "di_dlg", "di_slg", "di_flg", "di_llg"]
row_order = ["max_4", "max_8", "max_12", "max_16", "mean_4", "mean_8", "mean_12", "mean_16"]
df = WHOLE_DF.copy()
df["model"] = df.apply(lambda row: f"{row['training']}_{row['graph']}", axis=1)
df["config"] = df.apply(lambda row: f"{row['aggr']}_{row['layers']}", axis=1)
df["solved"] = df["solved"] * df["normaliser"]
df = df.drop(columns=["training", "graph", "problem", "domain", "expanded", "evaluated", "time", "cost", "first_h", "normaliser", "aggr", "layers", "tried"])
df = df.groupby(["model", "config"]).agg({"solved": "sum"}).reset_index()
df = df.pivot_table(index="config", columns="model", values="solved", aggfunc="sum", fill_value=0)
df = df[column_order]
df = df.reindex(row_order)
df = df.reset_index()

df

model,config,dd_dlg,dd_slg,dd_flg,dd_llg,di_dlg,di_slg,di_flg,di_llg
0,max_4,3.41,2.86,0.0,3.96,1.67,1.67,0.0,1.76
1,max_8,2.77,2.81,0.0,3.87,0.0,0.0,0.0,0.0
2,max_12,0.0,2.68,0.0,3.13,0.0,0.0,0.0,0.0
3,max_16,0.0,1.64,0.0,2.52,0.0,0.0,0.0,0.0
4,mean_4,3.8,3.03,0.0,4.41,0.0,0.0,0.0,1.41
5,mean_8,3.23,2.94,0.0,4.92,0.0,0.0,0.0,0.0
6,mean_12,2.61,2.44,0.0,3.59,0.0,0.0,0.0,0.0
7,mean_16,0.0,0.78,0.0,3.37,0.0,0.0,0.0,0.0


### Expanded

In [20]:
df = get_selected_param_df()
df = df[df["solved"] == 1]
for domain in DOMAINS:
    domain_df = df[df["domain"] == domain]
    fig = px.scatter(domain_df, x="problem", y="expanded", color="model", log_y=True, title=domain)
    fig.show()