In [8]:
import numpy as np
import pickle
import json
from pathlib import Path
from sklearn.metrics import accuracy_score

In [9]:
with open("outputs/parsed_evaluation.pkl", "rb") as f:
    evaluation = pickle.load(f)

print("Loaded", len(evaluation), "evaluation tasks.")

Loaded 400 evaluation tasks.


In [10]:
def pad_grid(grid, target_size=30):
    H, W = grid.shape
    out = -1 * np.ones((target_size, target_size), dtype=int)
    out[:H, :W] = grid
    return out

def raw_to_vector(grid):
    return pad_grid(grid).flatten()


X_raw_eval = []
y_raw_eval = []

for task_id, task in evaluation.items():
    for inp, out in zip(task["test_inputs"], task["test_outputs"]):
        X_raw_eval.append(raw_to_vector(inp))
        y_raw_eval.append(int(out.sum()) % 10)

X_raw_eval = np.vstack(X_raw_eval)
y_raw_eval = np.array(y_raw_eval)

print("Raw evaluation dataset:", X_raw_eval.shape, y_raw_eval.shape)

Raw evaluation dataset: (419, 900) (419,)


In [11]:
import pandas as pd

df_eval_features = pd.read_pickle("outputs/features_evaluation.pkl")

numeric_cols = df_eval_features.select_dtypes(include=[int, float]).columns
group_cols = ["task_id", "grid_role", "grid_index"]
numeric_cols = [c for c in numeric_cols if c not in group_cols]

df_eval_grouped = (
    df_eval_features
    .groupby(group_cols)[numeric_cols]
    .mean()
    .reset_index()
)

df_eval_grids_test = df_eval_grouped[df_eval_grouped["grid_role"] == "test_input"]

X_feat_eval = df_eval_grids_test[numeric_cols].to_numpy()

y_feat_eval = []
for _, row in df_eval_grids_test.iterrows():
    tid = row["task_id"]
    idx = int(row["grid_index"])
    out_grid = evaluation[tid]["test_outputs"][idx]
    y_feat_eval.append(int(out_grid.sum()) % 10)

y_feat_eval = np.array(y_feat_eval)

print("Feature evaluation dataset:", X_feat_eval.shape, y_feat_eval.shape)

Feature evaluation dataset: (419, 18) (419,)


In [12]:
model_dir = Path("outputs/models")

model_dir = Path("outputs/models")

model_files = {
    "raw_LogReg":           model_dir / "raw_LogReg.pkl",
    "raw_LinearSVM":        model_dir / "raw_LinearSVM.pkl",
    "raw_SGD-Hinge":        model_dir / "raw_SGD-Hinge.pkl",
    "raw_RBF-SVM":          model_dir / "raw_RBF-SVM.pkl",
    "raw_RandomForest":     model_dir / "raw_RandomForest.pkl",
    "raw_ExtraTrees":       model_dir / "raw_ExtraTrees.pkl",
    "raw_GradientBoosting": model_dir / "raw_GradientBoosting.pkl",
    "raw_AdaBoost":         model_dir / "raw_AdaBoost.pkl",
    "raw_XGBoost":          model_dir / "raw_XGBoost.pkl",
    "raw_NeuralNet":        model_dir / "raw_NeuralNet.pkl",
    "raw_NeuralNet-Deep":   model_dir / "raw_NeuralNet-Deep.pkl",
    "feat_LogReg":           model_dir / "feat_LogReg.pkl",
    "feat_LinearSVM":        model_dir / "feat_LinearSVM.pkl",
    "feat_SGD-Hinge":        model_dir / "feat_SGD-Hinge.pkl",
    "feat_RBF-SVM":          model_dir / "feat_RBF-SVM.pkl",
    "feat_RandomForest":     model_dir / "feat_RandomForest.pkl",
    "feat_ExtraTrees":       model_dir / "feat_ExtraTrees.pkl",
    "feat_GradientBoosting": model_dir / "feat_GradientBoosting.pkl",
    "feat_AdaBoost":         model_dir / "feat_AdaBoost.pkl",
    "feat_XGBoost":          model_dir / "feat_XGBoost.pkl",
    "feat_NeuralNet":        model_dir / "feat_NeuralNet.pkl",
    "feat_NeuralNet-Deep":   model_dir / "feat_NeuralNet-Deep.pkl",
}


models = {}
for name, path in model_files.items():
    with open(path, "rb") as f:
        models[name] = pickle.load(f)

print("Loaded models:", list(models.keys()))

Loaded models: ['raw_LogReg', 'raw_LinearSVM', 'raw_SGD-Hinge', 'raw_RBF-SVM', 'raw_RandomForest', 'raw_ExtraTrees', 'raw_GradientBoosting', 'raw_AdaBoost', 'raw_XGBoost', 'raw_NeuralNet', 'raw_NeuralNet-Deep', 'feat_LogReg', 'feat_LinearSVM', 'feat_SGD-Hinge', 'feat_RBF-SVM', 'feat_RandomForest', 'feat_ExtraTrees', 'feat_GradientBoosting', 'feat_AdaBoost', 'feat_XGBoost', 'feat_NeuralNet', 'feat_NeuralNet-Deep']


In [13]:
results = {}

for name, model in models.items():
    print(f"Evaluating: {name}")

    if name.startswith("raw_"):
        preds = model.predict(X_raw_eval)
        acc = accuracy_score(y_raw_eval, preds)

    elif name.startswith("feat_"):
        preds = model.predict(X_feat_eval)
        acc = accuracy_score(y_feat_eval, preds)

    else:
        raise ValueError("Unknown model prefix.")

    results[name] = float(acc)

results

Evaluating: raw_LogReg
Evaluating: raw_LinearSVM
Evaluating: raw_SGD-Hinge
Evaluating: raw_RBF-SVM
Evaluating: raw_RandomForest
Evaluating: raw_ExtraTrees
Evaluating: raw_GradientBoosting
Evaluating: raw_AdaBoost
Evaluating: raw_XGBoost
Evaluating: raw_NeuralNet
Evaluating: raw_NeuralNet-Deep
Evaluating: feat_LogReg
Evaluating: feat_LinearSVM
Evaluating: feat_SGD-Hinge
Evaluating: feat_RBF-SVM
Evaluating: feat_RandomForest
Evaluating: feat_ExtraTrees
Evaluating: feat_GradientBoosting
Evaluating: feat_AdaBoost
Evaluating: feat_XGBoost
Evaluating: feat_NeuralNet
Evaluating: feat_NeuralNet-Deep


{'raw_LogReg': 0.11694510739856802,
 'raw_LinearSVM': 0.1026252983293556,
 'raw_SGD-Hinge': 0.11694510739856802,
 'raw_RBF-SVM': 0.1766109785202864,
 'raw_RandomForest': 0.1360381861575179,
 'raw_ExtraTrees': 0.1360381861575179,
 'raw_GradientBoosting': 0.11217183770883055,
 'raw_AdaBoost': 0.11217183770883055,
 'raw_XGBoost': 0.12171837708830549,
 'raw_NeuralNet': 0.11933174224343675,
 'raw_NeuralNet-Deep': 0.10978520286396182,
 'feat_LogReg': 0.11455847255369929,
 'feat_LinearSVM': 0.12410501193317422,
 'feat_SGD-Hinge': 0.11455847255369929,
 'feat_RBF-SVM': 0.1288782816229117,
 'feat_RandomForest': 0.12171837708830549,
 'feat_ExtraTrees': 0.11694510739856802,
 'feat_GradientBoosting': 0.11933174224343675,
 'feat_AdaBoost': 0.13365155131264916,
 'feat_XGBoost': 0.12649164677804295,
 'feat_NeuralNet': 0.10739856801909307,
 'feat_NeuralNet-Deep': 0.11455847255369929}

In [14]:
summary = {
    "classification_accuracy": results,
    "num_eval_samples_raw": int(len(y_raw_eval)),
    "num_eval_samples_feat": int(len(y_feat_eval)),
}

with open("outputs/evaluation_summary.json", "w") as f:
    json.dump(summary, f, indent=2)

print("Saved evaluation_summary.json")

Saved evaluation_summary.json
