In [37]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from itertools import product
from util.scrape_log import scrape_search_log
from dataset.goose_domain_info import GOOSE_DOMAINS as DOMAINS
from dataset.goose_domain_info import get_domain_instance_pddl_for_domain

In [9]:
_LOG_DIR = "aaai24_logs/test"

In [10]:
os.system("sh collect_cluster1_logs.sh")

aaai24_logs/
aaai24_logs/test/
aaai24_logs/test/n50-s1_dd_llg_visitsome_L4_H64_mean_p10.log
aaai24_logs/test/n20-s2_dd_slg_visitsome_L4_H64_max_p10.log
aaai24_logs/test/n15-s4_dd_llg_visitsome_L4_H64_mean_p10.log
aaai24_logs/test/p-n10-b3-w3-s2_dd_llg_sokoban_L4_H64_mean_p10.log
aaai24_logs/test/p-n8-b2-w5-s1_dd_llg_sokoban_L4_H64_max_p10.log
aaai24_logs/test/blocks20-task01_dd_dlg_blocks_L4_H64_max_p10.log
aaai24_logs/test/p-l50-c50-s3_dd_llg_ferry_L4_H64_mean_p10.log
aaai24_logs/test/p-l70-c70-s5_dd_llg_ferry_L4_H64_mean_p10.log
aaai24_logs/test/n65-s4_dd_llg_visitall_L4_H64_max_p10.log
aaai24_logs/test/p-s30-n30-l20-seed4_dd_llg_spanner_L4_H64_max_p10.log
aaai24_logs/test/p-s20-n20-l13-seed2_dd_slg_spanner_L4_H64_max_p10.log
aaai24_logs/test/n40-s2_dd_llg_visitsome_L4_H64_max_p10.log
aaai24_logs/test/p-n8-b3-w3-s0_dd_llg_sokoban_L4_H64_max_p10.log
aaai24_logs/test/p-s70-n70-l46-seed3_dd_llg_spanner_L4_H64_max_p10.log
aaai24_logs/test/p-s60-n60-l40-seed5_dd_llg_spanner_L4_H64_max_p10

0

In [39]:
# ...goose-aaai24/learner/aaai24_logs/test/blocks15-task01_dd_llg_blocks_L4_H64_max_p10.log
MODELS = list(product(["di", "dd"], ["dlg", "slg", "flg", "llg"], [4, 8, 12, 16], ["mean", "max"]))
DOMAINS = sorted(DOMAINS)

In [40]:
data = {
    "training": [],
    "graph": [],
    "layers": [],
    "aggr": [],
    "domain": [],
    "problem": [],
    "solved": [],
    "expanded": [],
    "evaluated": [],
    "time": [],
    "cost": [],
    "first_h": [],
}

for (training, rep, layers, aggr), domain in product(MODELS, DOMAINS):
    for i, (_, _, pf) in enumerate(get_domain_instance_pddl_for_domain(domain, "test")):
        problem_name = os.path.basename(pf).replace(".pddl", "")
        log_file = "_".join([problem_name, training, rep, domain, f"L{layers}", "H64", aggr, "p10" if training == "dd" else "p20"])
        log_file = f"{_LOG_DIR}/{log_file}.log"
        stats = scrape_search_log(log_file)

        data["training"].append(training)
        data["graph"].append(rep)
        data["layers"].append(layers)
        data["aggr"].append(aggr)
        data["domain"].append(domain)
        data["problem"].append(i)

        assert len(data) - 6 == len(stats)
        for k in stats:
            data[k].append(stats[k])

WHOLE_DF = pd.DataFrame(data=data)
WHOLE_DF

Unnamed: 0,training,graph,layers,aggr,domain,problem,solved,expanded,evaluated,time,cost,first_h
0,di,dlg,4,mean,blocks,0,0,-1,-1,-1.0,-1,-1
1,di,dlg,4,mean,blocks,1,0,-1,-1,-1.0,-1,-1
2,di,dlg,4,mean,blocks,2,0,-1,-1,-1.0,-1,-1
3,di,dlg,4,mean,blocks,3,0,-1,-1,-1.0,-1,-1
4,di,dlg,4,mean,blocks,4,0,-1,-1,-1.0,-1,-1
...,...,...,...,...,...,...,...,...,...,...,...,...
38907,dd,llg,16,max,visitsome,85,0,-1,-1,-1.0,-1,-1
38908,dd,llg,16,max,visitsome,86,0,-1,-1,-1.0,-1,-1
38909,dd,llg,16,max,visitsome,87,0,-1,-1,-1.0,-1,-1
38910,dd,llg,16,max,visitsome,88,0,-1,-1,-1.0,-1,-1


### Selected Layer and Aggr params

In [52]:
L = 4
A = "max"

In [53]:
def get_selected_param_df():
    df = WHOLE_DF
    df = df[df["layers"] == L]
    df = df[df["aggr"] == A]
    df["model"] = df.apply(lambda row: f"{row['training']}_{row['graph']}", axis=1)
    df = df.drop(columns=["training", "graph", "layers", "aggr"])
    return df

### Coverage for selected Layer and Aggr params

In [54]:
df = get_selected_param_df()
df = df.drop(columns=["expanded", "evaluated", "time", "cost", "first_h", "problem"])
df = df.groupby(["model", "domain"]).agg({"solved": "sum"}).reset_index()

# pivot
df = df.pivot_table(index="domain", columns="model", values="solved", aggfunc="sum", fill_value=0)
column_order = ["dd_dlg", "dd_flg", "dd_slg", "dd_llg", "di_dlg", "di_flg", "di_slg", "di_llg"]
df = df[column_order]
df.reset_index(inplace=True)

df

model,domain,dd_dlg,dd_flg,dd_slg,dd_llg,di_dlg,di_flg,di_slg,di_llg
0,blocks,13,0,7,21,0,0,0,0
1,ferry,29,0,39,81,0,0,0,0
2,gripper,11,0,5,18,0,0,0,0
3,n-puzzle,16,0,17,0,0,0,0,0
4,sokoban,57,0,49,40,0,0,0,0
5,spanner,0,0,0,60,0,0,0,0
6,visitall,51,0,39,44,0,0,0,0
7,visitsome,86,0,81,20,0,0,0,0


### Expanded

In [61]:
df = get_selected_param_df()
df = df[df["solved"] == 1]
for domain in DOMAINS:
    domain_df = df[df["domain"] == domain]
    fig = px.scatter(domain_df, x="problem", y="expanded", color="model", log_y=True, title=domain)
    fig.show()