In [1]:
# Import libraries
import json
import numpy as np
from pathlib import Path
import pickle

In [2]:
# Function to parse ARC-AGI task
def load_arc_agi_task(raw):

    train_inputs  = [np.array(p["input"])  for p in raw["train"]]
    train_outputs = [np.array(p["output"]) for p in raw["train"]]

    test_inputs   = [np.array(p["input"])  for p in raw["test"]]
    test_outputs  = [np.array(p["output"]) for p in raw["test"]]

    return {
        "train_inputs": train_inputs,
        "train_outputs": train_outputs,
        "test_inputs": test_inputs,
        "test_outputs": test_outputs,
    }

In [3]:
# Function to load all tasks from directory
def load_directory(path: Path):

    tasks = {}

    for file in path.glob("*.json"):
        with open(file, "r") as f:
            raw_json = json.load(f)
        tasks[file.stem] = load_arc_agi_task(raw_json)

    return tasks

In [4]:
# Load training and evaluation data
DATA_DIR = Path("../../data")
ARC_DIR = DATA_DIR / "arc1"

TRAIN_DIR = ARC_DIR / "training"
EVAL_DIR  = ARC_DIR / "evaluation"

training = load_directory(TRAIN_DIR)
evaluation = load_directory(EVAL_DIR)

print("Loaded:")
print(" - training tasks:", len(training))
print(" - evaluation tasks:", len(evaluation))

Loaded:
 - training tasks: 400
 - evaluation tasks: 400


In [5]:
# Save parsed data to pickle files
Path("outputs").mkdir(exist_ok=True)

with open("outputs/parsed_training.pkl", "wb") as f:
    pickle.dump(training, f)

with open("outputs/parsed_evaluation.pkl", "wb") as f:
    pickle.dump(evaluation, f)

print("Saved parsed_training.pkl and parsed_evaluation.pkl")

Saved parsed_training.pkl and parsed_evaluation.pkl
