# Local benchmark notebook

This notebook is meant to evaluate a solution.

## Imports

In [1]:
import os
import json
import random
from tqdm import tqdm

# abstract_and_reason package import
from abstract_and_reason import solver_v1
from abstract_and_reason.assets import load_json

## JSON files loading

In [2]:
base_path = '../data/challenges/'

# Reading files
training_challenges =  load_json(base_path +'arc-agi_training_challenges.json')
training_solutions =   load_json(base_path +'arc-agi_training_solutions.json')
evaluation_challenges = load_json(base_path +'arc-agi_evaluation_challenges.json')
evaluation_solutions = load_json(base_path +'arc-agi_evaluation_solutions.json')
test_challenges = load_json(base_path +'arc-agi_test_challenges.json')

# Reading files
test_challenges = load_json(base_path +'arc-agi_test_challenges.json')
submission_file_path = './submission.json'
sample_submission_file_path = base_path + 'sample_submission.json'
sample_submission = load_json(sample_submission_file_path)

In [3]:
# Ensure the submission file exists or create an empty one
if os.path.exists(sample_submission_file_path):
    sample_submission = load_json(sample_submission_file_path)
else:
    sample_submission = {}

with open(submission_file_path, "w") as file:
        json.dump(sample_submission, file, indent=4)

## Let's define our Solver

In [4]:
abstract_and_reason = solver_v1.Solver(prod=False)

# Submission

In [5]:
with open(submission_file_path, "r+") as outfile:
    submission_data = json.load(outfile)
    
    ids_test = list(test_challenges)

    for i, challenge_id in enumerate(tqdm(ids_test)):
        puzzle_ins_train, puzzle_outs_train, puzzle_ins_test, puzzle_outs_test = abstract_and_reason.process_challenge(challenge_id, test_challenges)
        
        attempt1 = abstract_and_reason.predict(puzzle_ins_train, puzzle_outs_train, puzzle_ins_test)
        attempt2 = abstract_and_reason.predict(puzzle_ins_train, puzzle_outs_train, puzzle_ins_test)
        
        result = []
        for j in range(len(attempt1)):
            result.append({
                'attempt_1': attempt1[j].tolist(),
                'attempt_2': attempt2[j].tolist()
            })
        
        submission_data[challenge_id] = result
        
        outfile.seek(0)
        json.dump(submission_data, outfile, indent=4)
        outfile.truncate()

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 142.05it/s]


# Validation

In [6]:
import numpy as np

def get_score(model_answers, real_answers):
    """
    Computes a score based on the similarity between model-generated answers and real answers.
    It handles input matrices of different shapes and ensures comparisons are done within the bounds of the shortest list.

    Args:
        model_answers (list of lists): Model-generated answers as matrices (list of lists).
        real_answers (list of lists): Real answers as matrices (list of lists).

    Returns:
        int: The total score as an integer.
    """
    total_score = 0
    valid_comparisons = 0
    
    for i in range(min(len(model_answers), len(real_answers))):
        model_answer = np.array(model_answers[i])
        real_answer = np.array(real_answers[i])
        
        if model_answer.shape == real_answer.shape:
            score = ((model_answer == real_answer).astype(int)).mean()
            if score == 1.0:
                total_score += 1
            valid_comparisons += 1
    
    return int(total_score / valid_comparisons) if valid_comparisons > 0 else 0


def get_anwser(challenge_id, answers_file_path):
    sample_submission = load_json(answers_file_path)
    challenge = sample_submission[challenge_id]
    return challenge[0]

In [8]:
total_score = 0

with open(submission_file_path, "r") as outfile: # previously 'r+'
    submission_data = json.load(outfile)
    
    ids_test = list(test_challenges)

    for i, challenge_id in enumerate(ids_test):

        ground_truth = get_anwser(challenge_id, sample_submission_file_path)
        model_answer = get_anwser(challenge_id, submission_file_path)

        challenge_score = 0
        for attempt in ground_truth.keys():
            challenge_score += get_score(model_answer[attempt], ground_truth[attempt])

        total_score += (challenge_score)/2

print(f"Final score: {total_score}")

Final score: 0.0
