In [3]:
import matplotlib.pyplot as plt
import os
import pandas as pd
import numpy as np
import itertools
from util.save_load import load_kernel_model
from dataset.graphs_kernel import get_graph_data
from kernels.wrapper import MODELS, KernelModelWrapper
from dataset.ipc2023_learning_domain_info import IPC2023_LEARNING_DOMAINS, get_number_of_ipc2023_training_data
from itertools import product
from IPython.display import display, HTML

In [4]:
_LOG_DIR = "logs/train_kernel"

ITERATIONS = list(range(1, 10))

PRUNE_COEFS = list(range(10))

N_TRAINING_DATA = get_number_of_ipc2023_training_data()

### Train metrics

In [5]:
def get_data(domain):
  d = {
    "config": [],
    "mse": [],
    "f1": [],
    "nonzero_weights": [],
    "time": [],
  }

  CONFIGS = list(product(MODELS, ITERATIONS, PRUNE_COEFS))
  for config in CONFIGS:
    model, iterations, prune = config
    log_file = "_".join([model, "llg", "ipc2023-learning-"+domain, "wl", str(iterations), str(prune*iterations)])+".log"
    log_file = _LOG_DIR + "/" + log_file
    
    if not os.path.exists(log_file):
      continue

    stats = {
      "config": "_".join([model, "wl", str(iterations), str(prune*iterations)])
    }

    lines = list(open(log_file, 'r').readlines())
    for line in lines:
      toks = line.split()
      if "train_mse" in line:
        stats["mse"] = float(toks[-1])
      elif "train_f1_macro" in line:
        stats["f1"] = float(toks[-1])
      elif "zero_weights" in line:
        weights = int(toks[1].split('/')[1])
        zeros = int(toks[1].split('/')[0])
        stats["nonzero_weights"] = weights - zeros
      elif "Model training completed in " in line:
        stats["time"] = float(toks[-1].replace("s", ""))
    
    if "nonzero_weights" not in stats:
      stats["nonzero_weights"] = "na"

    if len(stats) != len(d):
      continue

    for key in stats:
      d[key].append(stats[key])

  return d

def get_df(domain):
  d = get_data(domain)
  return pd.DataFrame(d)

In [12]:
og_iterations = ITERATIONS
og_prune_coefs = PRUNE_COEFS

ITERATIONS = [1]
PRUNE_COEFS = [0]

max_times = []
for domain in IPC2023_LEARNING_DOMAINS:
  max_times.append(max(get_df(domain).to_numpy()[:,-1]))
  print(domain, N_TRAINING_DATA[domain])
  display(get_df(domain))
print("max time:", max(max_times))

ITERATIONS = og_iterations
PRUNE_COEFS = og_prune_coefs

blocksworld 4954


Unnamed: 0,config,mse,f1,nonzero_weights,time
0,linear-svr_wl_1_0,0.32,0.45,6143,6.22
1,ridge_wl_1_0,0.25,0.69,6218,9.94
2,lasso_wl_1_0,3.74,0.23,6,11.39
3,rbf-svr_wl_1_0,4.07,0.32,na,29.24
4,quadratic-svr_wl_1_0,11.01,0.26,na,31.21
5,cubic-svr_wl_1_0,21.03,0.2,na,14.08


childsnack 2148


Unnamed: 0,config,mse,f1,nonzero_weights,time
0,linear-svr_wl_1_0,0.19,0.86,212,2.91
1,ridge_wl_1_0,0.13,0.9,155,0.02
2,lasso_wl_1_0,1.46,0.38,6,0.02
3,rbf-svr_wl_1_0,0.65,0.49,na,0.25
4,quadratic-svr_wl_1_0,1.28,0.38,na,0.25
5,cubic-svr_wl_1_0,1.55,0.41,na,0.25


ferry 3662


Unnamed: 0,config,mse,f1,nonzero_weights,time
0,linear-svr_wl_1_0,0.43,0.59,551,3.81
1,ridge_wl_1_0,0.38,0.54,516,0.04
2,lasso_wl_1_0,0.64,0.3,6,0.21
3,rbf-svr_wl_1_0,0.94,0.32,na,1.74
4,quadratic-svr_wl_1_0,1.73,0.25,na,1.7
5,cubic-svr_wl_1_0,2.32,0.27,na,1.76


floortile 8351


Unnamed: 0,config,mse,f1,nonzero_weights,time
0,linear-svr_wl_1_0,6.66,0.18,416,9.42
1,ridge_wl_1_0,5.5,0.13,371,0.04
2,lasso_wl_1_0,11.09,0.1,7,0.43
3,rbf-svr_wl_1_0,13.97,0.1,na,9.71
4,quadratic-svr_wl_1_0,17.17,0.08,na,10.73
5,cubic-svr_wl_1_0,16.68,0.09,na,9.96


miconic 1630


Unnamed: 0,config,mse,f1,nonzero_weights,time
0,linear-svr_wl_1_0,0.76,0.42,141,1.05
1,ridge_wl_1_0,0.71,0.38,109,0.03
2,lasso_wl_1_0,1.21,0.28,5,0.01
3,rbf-svr_wl_1_0,1.02,0.34,na,0.13
4,quadratic-svr_wl_1_0,1.34,0.3,na,0.13
5,cubic-svr_wl_1_0,1.63,0.28,na,0.13


rovers 4623


Unnamed: 0,config,mse,f1,nonzero_weights,time
0,linear-svr_wl_1_0,12.56,0.15,732,7.61
1,ridge_wl_1_0,8.58,0.13,730,0.18
2,lasso_wl_1_0,50.11,0.03,11,0.17
3,rbf-svr_wl_1_0,78.2,0.04,na,5.11
4,quadratic-svr_wl_1_0,60.81,0.03,na,4.96
5,cubic-svr_wl_1_0,53.92,0.04,na,5.01


satellite 26919


Unnamed: 0,config,mse,f1,nonzero_weights,time
0,linear-svr_wl_1_0,7.08,0.23,159,0.25
1,ridge_wl_1_0,6.55,0.15,109,0.02
2,lasso_wl_1_0,24.04,0.05,10,0.01
3,rbf-svr_wl_1_0,852.36,0.0,na,0.02
4,quadratic-svr_wl_1_0,647.68,0.0,na,0.01
5,cubic-svr_wl_1_0,343.95,0.01,na,0.01


sokoban 2422


Unnamed: 0,config,mse,f1,nonzero_weights,time
0,linear-svr_wl_1_0,55.83,0.03,138,2.69
1,ridge_wl_1_0,36.93,0.04,97,0.03
2,lasso_wl_1_0,55.27,0.02,8,0.02
3,rbf-svr_wl_1_0,137.7,0.02,na,0.27
4,quadratic-svr_wl_1_0,132.98,0.02,na,0.26
5,cubic-svr_wl_1_0,117.77,0.02,na,0.26


spanner 1416


Unnamed: 0,config,mse,f1,nonzero_weights,time
0,linear-svr_wl_1_0,0.87,0.39,127,1.24
1,ridge_wl_1_0,0.81,0.42,87,0.01
2,lasso_wl_1_0,2.14,0.19,2,0.0
3,rbf-svr_wl_1_0,1.41,0.31,na,0.09
4,quadratic-svr_wl_1_0,2.28,0.25,na,0.09
5,cubic-svr_wl_1_0,3.41,0.18,na,0.09


transport 4316


Unnamed: 0,config,mse,f1,nonzero_weights,time
0,linear-svr_wl_1_0,39.04,0.13,481,3.57
1,ridge_wl_1_0,28.44,0.08,425,0.03
2,lasso_wl_1_0,48.14,0.03,7,0.13
3,rbf-svr_wl_1_0,52.21,0.06,na,2.32
4,quadratic-svr_wl_1_0,54.42,0.06,na,2.22
5,cubic-svr_wl_1_0,60.05,0.05,na,2.39


max time: 31.21


In [70]:
for domain in IPC2023_LEARNING_DOMAINS:
  for model in ["linear-svr", "lasso", "ridge"]:
    d = get_data(domain)

    map_scores = {}
    for k, config in enumerate(d['config']):
      if model not in config:
        continue
      toks = config.split("_")
      key = (int(toks[-2]), int(toks[-1]))
      map_scores[key] = {
        "mse": d['mse'][k],
        "f1": d['f1'][k],
        "nonzero_weights": d['nonzero_weights'][k],
      }

    fig, ax = plt.subplots(1, 3)
    for k, metric in enumerate(["f1", "mse", "nonzero_weights"]):
      heatmap = np.zeros((len(ITERATIONS), len(PRUNE_COEFS)))
      for i in ITERATIONS:
        for j in PRUNE_COEFS:
          try:
            heatmap[i-1,j] = map_scores[(i, j*i)][metric]
            if metric=='nonzero_weights':
              heatmap[i-1,j] = np.log10(heatmap[i-1,j])
          except:
            heatmap[i-1,j] = np.nan

      ax[k].set_xticks(range(len(PRUNE_COEFS)), PRUNE_COEFS)
      ax[k].set_xlabel("Prune Coefficient")
      ax[k].set_yticks(range(len(ITERATIONS)), ITERATIONS)
      ax[k].set_ylabel("WL iterations")
      ax[k].imshow(heatmap)
      # ax[k].colorbar()
      if metric == "nonzero_weights":
        title = (f"nonzero_weights log10")
      else:
        title = metric
      ax[k].set_title(title)
    plt.title(f"{model} {domain}")
    plt.savefig(f"plots/heatmap_train-stats_{model}_ipc2023-learning-{domain}.png")
    plt.clf()

  fig, ax = plt.subplots(1, 3)


<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

<Figure size 640x480 with 0 Axes>

2D scatter

In [82]:
import matplotlib as mpl
import matplotlib.cm as cm

cmap=cm.rainbow(np.array(ITERATIONS)/np.max(ITERATIONS)) 

for domain in IPC2023_LEARNING_DOMAINS:
  for model in ["linear-svr", "lasso", "ridge"]:
    d = get_data(domain)

    map_scores = {}
    for k, config in enumerate(d['config']):
      if model not in config:
        continue
      toks = config.split("_")
      key = (int(toks[-2]), int(toks[-1]))
      map_scores[key] = {
        "mse": d['mse'][k],
        "f1": d['f1'][k],
        "nonzero_weights": d['nonzero_weights'][k],
      }

    xs = {i: [] for i in ITERATIONS}
    ys = {i: [] for i in ITERATIONS}
    for metric in ["mse", "nonzero_weights"]:
      for i in ITERATIONS:
        for j in PRUNE_COEFS:
          try:
            if metric=="mse":
              ys[i].append(map_scores[(i, j*i)][metric])
            else:
              xs[i].append(map_scores[(i, j*i)][metric])
          except:
            pass
    
    lines = []
    for i in ITERATIONS:
      plt.scatter(xs[i], ys[i], label=i, c=cmap[i-1])
    plt.xscale("log")
    plt.xlabel("nonzero_weights (log)")
    plt.ylabel("mse")
    plt.legend()
    plt.ylim(bottom=0)
    plt.title(f"{model} {domain}")
    plt.savefig(f"plots/scatter_train-stats_{model}_ipc2023-learning-{domain}.png")
    plt.clf()

  plt.scatter(xs[i], ys[i], label=i, c=cmap[i-1])


<Figure size 640x480 with 0 Axes>

In [72]:
for domain in IPC2023_LEARNING_DOMAINS:
  print(domain, N_TRAINING_DATA[domain])
  df = get_df(domain)
  display(df)

blocksworld 4954


Unnamed: 0,config,mse,f1,nonzero_weights
0,linear-svr_wl_1_0,0.32,0.45,6150
1,linear-svr_wl_1_1,0.62,0.42,5268
2,linear-svr_wl_1_2,0.65,0.45,1887
3,linear-svr_wl_1_3,0.58,0.42,1644
4,linear-svr_wl_1_4,0.68,0.36,584
...,...,...,...,...
255,lasso_wl_9_45,3.74,0.23,7
256,lasso_wl_9_54,3.74,0.23,7
257,lasso_wl_9_63,3.74,0.23,7
258,lasso_wl_9_72,3.74,0.23,7


childsnack 2148


Unnamed: 0,config,mse,f1,nonzero_weights
0,linear-svr_wl_1_0,0.11,0.91,1113
1,linear-svr_wl_1_1,0.34,0.51,1057
2,linear-svr_wl_1_2,0.11,0.91,952
3,linear-svr_wl_1_3,0.13,0.91,909
4,linear-svr_wl_1_4,0.18,0.89,824
...,...,...,...,...
265,lasso_wl_9_45,1.46,0.38,8
266,lasso_wl_9_54,1.46,0.38,8
267,lasso_wl_9_63,1.46,0.38,8
268,lasso_wl_9_72,1.46,0.38,8


ferry 3662


Unnamed: 0,config,mse,f1,nonzero_weights
0,linear-svr_wl_1_0,0.47,0.52,821
1,linear-svr_wl_1_1,0.39,0.62,817
2,linear-svr_wl_1_2,0.41,0.59,744
3,linear-svr_wl_1_3,0.40,0.60,730
4,linear-svr_wl_1_4,0.43,0.59,613
...,...,...,...,...
260,lasso_wl_9_45,0.64,0.30,8
261,lasso_wl_9_54,0.64,0.30,8
262,lasso_wl_9_63,0.64,0.30,8
263,lasso_wl_9_72,0.64,0.30,8


floortile 8351


Unnamed: 0,config,mse,f1,nonzero_weights
0,linear-svr_wl_1_0,3.95,0.18,1226
1,linear-svr_wl_1_1,3.95,0.18,1226
2,linear-svr_wl_1_2,3.96,0.17,1200
3,linear-svr_wl_1_3,4.12,0.19,897
4,linear-svr_wl_1_4,4.20,0.19,891
...,...,...,...,...
261,lasso_wl_9_45,10.45,0.10,12
262,lasso_wl_9_54,10.45,0.10,12
263,lasso_wl_9_63,10.45,0.10,12
264,lasso_wl_9_72,10.45,0.10,12


miconic 1630


Unnamed: 0,config,mse,f1,nonzero_weights
0,linear-svr_wl_1_0,0.61,0.52,576
1,linear-svr_wl_1_1,0.59,0.52,545
2,linear-svr_wl_1_2,0.59,0.51,522
3,linear-svr_wl_1_3,0.60,0.52,492
4,linear-svr_wl_1_4,0.60,0.52,467
...,...,...,...,...
265,lasso_wl_9_45,1.21,0.28,5
266,lasso_wl_9_54,1.21,0.28,5
267,lasso_wl_9_63,1.21,0.28,5
268,lasso_wl_9_72,1.21,0.28,5


rovers 4623


Unnamed: 0,config,mse,f1,nonzero_weights
0,linear-svr_wl_1_0,1.61,0.23,1837
1,linear-svr_wl_1_1,1.00,0.37,1780
2,linear-svr_wl_1_2,1.01,0.35,1766
3,linear-svr_wl_1_3,1.89,0.22,1540
4,linear-svr_wl_1_4,1.80,0.26,1433
...,...,...,...,...
254,lasso_wl_9_45,25.49,0.06,21
255,lasso_wl_9_54,25.49,0.06,21
256,lasso_wl_9_63,25.49,0.06,21
257,lasso_wl_9_72,25.49,0.06,21


satellite 26919


Unnamed: 0,config,mse,f1,nonzero_weights
0,linear-svr_wl_1_0,7.19,0.26,231
1,linear-svr_wl_1_1,7.19,0.26,231
2,linear-svr_wl_1_2,7.24,0.26,213
3,linear-svr_wl_1_3,7.48,0.25,208
4,linear-svr_wl_1_4,7.72,0.23,197
...,...,...,...,...
265,lasso_wl_9_45,18.27,0.04,12
266,lasso_wl_9_54,18.27,0.04,12
267,lasso_wl_9_63,18.27,0.04,12
268,lasso_wl_9_72,18.27,0.04,12


sokoban 2422


Unnamed: 0,config,mse,f1,nonzero_weights
0,linear-svr_wl_1_0,42.05,0.03,201
1,linear-svr_wl_1_1,34.73,0.05,199
2,linear-svr_wl_1_2,34.73,0.05,199
3,linear-svr_wl_1_3,34.73,0.05,199
4,linear-svr_wl_1_4,35.39,0.05,198
...,...,...,...,...
265,lasso_wl_9_45,27.32,0.05,400
266,lasso_wl_9_54,27.32,0.05,400
267,lasso_wl_9_63,27.32,0.05,400
268,lasso_wl_9_72,27.32,0.05,400


spanner 1416


Unnamed: 0,config,mse,f1,nonzero_weights
0,linear-svr_wl_1_0,0.71,0.49,545
1,linear-svr_wl_1_1,0.71,0.48,519
2,linear-svr_wl_1_2,0.71,0.49,498
3,linear-svr_wl_1_3,0.72,0.49,464
4,linear-svr_wl_1_4,0.71,0.45,445
...,...,...,...,...
265,lasso_wl_9_45,1.83,0.24,4
266,lasso_wl_9_54,1.83,0.24,4
267,lasso_wl_9_63,1.83,0.24,4
268,lasso_wl_9_72,1.83,0.24,4


transport 4316


Unnamed: 0,config,mse,f1,nonzero_weights
0,linear-svr_wl_1_0,33.52,0.19,878
1,linear-svr_wl_1_1,33.46,0.18,855
2,linear-svr_wl_1_2,34.04,0.18,748
3,linear-svr_wl_1_3,33.75,0.17,689
4,linear-svr_wl_1_4,33.90,0.16,672
...,...,...,...,...
257,lasso_wl_9_45,43.53,0.04,13
258,lasso_wl_9_54,43.53,0.04,13
259,lasso_wl_9_63,43.53,0.04,13
260,lasso_wl_9_72,43.53,0.04,13


### WL metrics

In [73]:
def get_models(domain):
  d = {}

  for iterations, prune in itertools.product(ITERATIONS, PRUNE_COEFS):
    model = "linear-svr"
    log_file = "_".join([model, "llg", "ipc2023-learning-"+domain, "wl", str(iterations), str(prune*iterations)])+".log"
    log_file = _LOG_DIR + "/" + log_file
    
    if not os.path.exists(log_file):
      continue
      
    print(domain, iterations, prune)
    
    model_file = None
    for line in list(open(log_file, 'r').readlines()):
      toks = line.split()
      if "Model parameter file:" in line and len(toks)>3:
        model_file = line.split()[-1]
        break
    
    if model_file is None:
      continue
    model : KernelModelWrapper = load_kernel_model(model_file)[0]
    d[(iterations, prune)] = model

  return d

In [74]:
# load all models
model_domain = {}
for domain in IPC2023_LEARNING_DOMAINS:
  print(domain, N_TRAINING_DATA[domain])
  models = get_models(domain)
  model_domain[domain] = models

blocksworld 4954
blocksworld 1 0


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


blocksworld 1 1
blocksworld 1 2
blocksworld 1 3
blocksworld 1 4
blocksworld 1 5
blocksworld 1 6


FileNotFoundError: [Errno 2] No such file or directory: 'trained_models_kernel/linear-svr_llg_ipc2023-learning-blocksworld_wl_1_6.joblib'

In [None]:
domain = "blocksworld"
os.makedirs("plots", exist_ok=True)
for iterations, prune in itertools.product(ITERATIONS, PRUNE_COEFS):
  try:
    prune = iterations * prune
    m = model_domain[domain][(iterations, prune)]
    h = m.get_hash()
    n_colours = len(m.get_hash())
    hist = np.zeros(n_colours)
    for col, cnt in m._kernel._train_histogram.items():
      key = str(col)
      for symbol in [")", "(", " "]:
        key = key.replace(symbol, "")
      if key not in h:
        continue
      c = h[key]
      hist[c] += cnt
    hist = sorted(hist, reverse=True)
    bins = np.arange(len(hist)+1)
    plt.hist(bins[:-1], bins, weights=hist, log=True)
    plt.savefig(f"plots/wl_count_{domain}_{iterations}_{prune}_{n_colours}.png")
    plt.clf()
  except:
    pass

<Figure size 640x480 with 0 Axes>

In [None]:
domain="blocksworld"
iterations=1
prune=0
m = model_domain[domain][(iterations, prune)]
h1 = m.get_hash()

In [None]:
domain="blocksworld"
iterations=5
prune=5
m = model_domain[domain][(iterations, prune)]
h2 = m.get_hash()

In [None]:
common_colours = set(h1.keys()).intersection(set(h2.keys()))
print(len(common_colours), len(h1), len(h2))

200 6219 7743


In [None]:
model_domain[domain]

{(1, 0): <kernels.wrapper.KernelModelWrapper at 0x7fa3ab09c730>,
 (1, 1): <kernels.wrapper.KernelModelWrapper at 0x7fa3ab09c3d0>,
 (1, 2): <kernels.wrapper.KernelModelWrapper at 0x7fa3e44b9f60>,
 (1, 3): <kernels.wrapper.KernelModelWrapper at 0x7fa3ab09f0d0>,
 (1, 4): <kernels.wrapper.KernelModelWrapper at 0x7fa3a46109d0>,
 (1, 5): <kernels.wrapper.KernelModelWrapper at 0x7fa3a4611e70>,
 (3, 0): <kernels.wrapper.KernelModelWrapper at 0x7fa3a47ecd30>,
 (3, 1): <kernels.wrapper.KernelModelWrapper at 0x7fa3a46133d0>,
 (3, 2): <kernels.wrapper.KernelModelWrapper at 0x7fa3a47ee410>,
 (3, 3): <kernels.wrapper.KernelModelWrapper at 0x7fa3a47eee00>,
 (3, 4): <kernels.wrapper.KernelModelWrapper at 0x7fa3a47ec6a0>,
 (3, 5): <kernels.wrapper.KernelModelWrapper at 0x7fa3a47ec9d0>,
 (5, 1): <kernels.wrapper.KernelModelWrapper at 0x7fa3a47ec0d0>,
 (5, 2): <kernels.wrapper.KernelModelWrapper at 0x7fa3a47ec100>,
 (5, 3): <kernels.wrapper.KernelModelWrapper at 0x7fa3beeee5c0>,
 (5, 4): <kernels.wrapper

In [None]:
from util.save_load import load_kernel_model_and_setup
m = load_kernel_model_and_setup("trained_models_kernel/linear-svr_llg_ipc2023-learning-blocksworld_wl_1_0", "../benchmarks/ipc2023-learning-benchmarks/blocksworld/domain.pddl", "../benchmarks/ipc2023-learning-benchmarks/blocksworld/training/easy/p01.pddl")
m._kernel._train_histogram

llg created!
time taken: 0.1975s
num nodes: 58
num edges: 100
graph density: 0.060496067755595885
123
