In [48]:
import polars as pl
import json
from Levenshtein import distance as levenshtein_distance
import sys

sys.path.append("../..")
from src.utils import *

In [49]:
pl.Config.load_from_file("../../polars_cfg.json")

<polars.config.Config at 0x10aac1880>

In [50]:
clean_errors = pl.read_csv("../../data/clean_errors.csv").filter(
    pl.col("task_type") == "training"
)
clean_errors_incomplete = pl.read_csv("../../data/clean_errors_incomplete.csv").filter(
    pl.col("task_type") == "training"
)
errors = clean_errors.vstack(clean_errors_incomplete.select(clean_errors.columns))

In [51]:
with open("../../../re-arc/intermediate_states.json", "r") as f:
    intermediate_states = json.load(f)

In [52]:
# for each task, compute edit distance between the intermediate states and errors for that task
closest_intermediate = {
    "intermediate_state": [],
    "edit_distance": [],
}
for row in errors.iter_rows(named=True):
    error = row["test_output_grid"]
    task_id = row["task_name"].split(".")[0]
    states = intermediate_states[task_id]["states"]
    variables = intermediate_states[task_id]["variables"]
    if len(states) == 0:
        closest_intermediate["intermediate_state"].append("")
        closest_intermediate["edit_distance"].append(-1)
        continue
    edit_distance = []
    for state in states:
        state = grid2str(state)
        edit_distance.append(levenshtein_distance(error, state))
    min_distance = min(edit_distance)
    min_index = edit_distance.index(min_distance)
    closest_intermediate["intermediate_state"].append(grid2str(states[min_index]))
    closest_intermediate["edit_distance"].append(min_distance)

In [53]:
closest_intermediate = pl.DataFrame(closest_intermediate)
errors = errors.hstack(closest_intermediate)

In [54]:
exact_correspondance = errors.filter(pl.col("edit_distance") == 0)
num_exact_correspondance = exact_correspondance.sum().select("count")

In [55]:
exact_correspondance.sort("count", descending=True).head(15)

task_name,test_output_grid,hashed_output_grid,task_type,count,intermediate_state,edit_distance
str,str,str,str,i64,str,i64
"""017c7c7b.json""","""|000|000|000|000|000|000|000|000|000|""","""26341a0d34bff6a9ab39013f22a05325""","""training""",16,"""|000|000|000|000|000|000|000|000|000|""",0
"""f9012d9b.json""","""|8188188|1881881|8818818|8188188|1881881|8818…","""1c5fe1913239a349a52348493ed5c52e""","""training""",12,"""|8188188|1881881|8818818|8188188|1881881|8818…",0
"""f8c80d96.json""","""|4444440040|0000040040|0000040040|4440040040|…","""c60517fac73948ea0ee213e00736cc7a""","""training""",9,"""|4444440040|0000040040|0000040040|4440040040|…",0
"""ff805c23.json""","""|444040033300003330040444|4444043333033033334…","""8f1abb852eea0929f81d43f3fc75a6b9""","""training""",9,"""|444040033300003330040444|4444043333033033334…",0
"""264363fd.json""","""|444444444444444444444444444444|4444444444444…","""1c6a04dd58727e7d5dd5c4c05cae8b56""","""training""",7,"""|444444444444444444444444444444|4444444444444…",0
"""7468f01a.json""","""|11118111|11111881|11111111|88811111|11111111…","""f3d7bdc41a8cab1aac83c55bb6dc5c9a""","""training""",5,"""|11118111|11111881|11111111|88811111|11111111…",0
"""88a62173.json""","""|55|05|""","""898f7815af9f75a41c484bce19af5bdd""","""training""",5,"""|55|05|""",0
"""f8ff0b80.json""","""|3|1|6|""","""dadbb5c36ba570a524f1b59c2fc0d3b9""","""training""",4,"""|3|1|6|""",0
"""321b1fc6.json""","""|0000000000|0440004400|3433034330|0030000300|…","""36b5003d6e2e291120e445db2003873f""","""training""",4,"""|0000000000|0440004400|3433034330|0030000300|…",0
"""99b1bc43.json""","""|1011|0111|0010|1011|""","""c88224aa576407eaf37d0a0e075f7d2e""","""training""",4,"""|1011|0111|0010|1011|""",0


In [56]:
errors.filter(pl.col("edit_distance") == 1).sort("count", descending=True).head(15)

task_name,test_output_grid,hashed_output_grid,task_type,count,intermediate_state,edit_distance
str,str,str,str,i64,str,i64
"""fafffa47.json""","""|000|200|000|""","""2341e2b6474b63fa0996505c6f1b3fc8""","""training""",3,"""|000|000|000|""",1
"""88a62173.json""","""|50|05|""","""49fd9e8ee1c663a18693a61a6941e536""","""training""",3,"""|55|05|""",1
"""a740d043.json""","""|02|23|""","""25b7475260c50aae46e876d17b26c112""","""training""",3,"""|12|23|""",1
"""ce602527.json""","""|1111|1131|1311|""","""7a257151d7df6e2675497bed8d20e975""","""training""",2,"""|1311|1131|1311|""",1
"""a9f96cdd.json""","""|00030|00000|00000|""","""a211c4518f099927172972c748804679""","""training""",2,"""|00000|00000|00000|""",1
"""a61f2674.json""","""|000000000|000000000|000000000|000000000|0000…","""92ebc8c6b21bb734c91ff7202a32bcda""","""training""",2,"""|000000000|000000000|000000000|000000000|0000…",1
"""a61f2674.json""","""|000000000|000000010|000000010|000000010|0000…","""44ff5a82e7fe44edff0273eeb2a626ce""","""training""",2,"""|000000000|000000010|000000010|000000010|0000…",1
"""834ec97d.json""","""|000003000000|000000000000|000000000000|00000…","""67f6aa228bf40853607accf591ae6ba6""","""training""",2,"""|000000000000|000000000000|000000000000|00000…",1
"""017c7c7b.json""","""|000|000|000|000|000|000|000|000|010|""","""62f4f94e52db16db7c6e127243e792bb""","""training""",2,"""|000|000|000|000|000|000|000|000|000|""",1
"""ff805c23.json""","""|444040033300003330040444|4444043333033033334…","""401424b3cf60b8fff8d745438df6ab29""","""training""",2,"""|444040033300003330040444|4444043333033033334…",1
