In [None]:
!pip install --no-index --find-links=/kaggle/input/arc-prize-2025-arcsolver-4b-libraries/wheels vllm numpy scipy torch torchvision transformers

In [None]:
import vllm
import numpy
import scipy
import torch
import transformers

print(f'vLLM version: {vllm.__version__}')
print(f'Numpy version: {numpy.__version__}')
print(f'Scipy version: {scipy.__version__}')
print(f'PyTorch version: {torch.__version__}')
print(f'Transformers version: {transformers.__version__}')

In [None]:
import os
import sys
import json
import multiprocessing as mp

SCRIPTS_PATH = '/kaggle/input/arc-prize-2025-arcsolver-4b-scripts/Code'
sys.path.append(SCRIPTS_PATH)

import utils
import wrapper

In [None]:
TEST_CHALLENGES_PATH = '/kaggle/input/arc-prize-2025/arc-agi_test_challenges.json'
MODEL_PATH = '/kaggle/input/gemma-3/transformers/gemma-3-4b-it/1'
SUBMISSION_PATH = 'submission.json'
NUM_EASY_TASKS = 48 if os.getenv('KAGGLE_IS_COMPETITION_RERUN') else 8
NUM_PROCESSES = 4
SEED = 42

In [None]:
import random
import numpy as np
import torch

def set_seed(seed_value):
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)

set_seed(SEED)

In [None]:
with open(TEST_CHALLENGES_PATH, 'r') as f:
    test_challenges = json.load(f)

tasks_with_scores = []

for task_id, task_data in test_challenges.items():
    difficulty_score = utils.calculate_grid_dot_product(task_data['test'][0]['input'])
    tasks_with_scores.append((difficulty_score, task_id, task_data))

tasks_with_scores.sort()

tasks_to_process = tasks_with_scores[:NUM_EASY_TASKS]
tasks_to_skip = tasks_with_scores[NUM_EASY_TASKS:]

print(f'Selected {len(tasks_to_process)} tasks to solve.')
print(f'Will generate blank submissions for {len(tasks_to_skip)} tasks.')

In [None]:
if __name__ == '__main__':
    mp.set_start_method('spawn', force=True)

    manager = mp.Manager()
    results_dict = manager.dict()

    chunk_size = (len(tasks_to_process) + NUM_PROCESSES - 1) // NUM_PROCESSES
    task_chunks = [tasks_to_process[i:i + chunk_size] for i in range(0, len(tasks_to_process), chunk_size)]
    
    pool_args = [(task_chunks[i], i, results_dict, MODEL_PATH, SEED) for i in range(len(task_chunks))]
    
    processes = []
    for args in pool_args:
        p = mp.Process(target=wrapper.subprocess_wrapper, args=(args,))
        processes.append(p)
        p.start()

    for p in processes:
        p.join()
    
    submission_json = dict(results_dict)

    for _, task_id, task_data in tasks_to_skip:
        num_test_inputs = len(task_data.get('test', []))
        blank_predictions = [{'attempt_1': [], 'attempt_2': []} for _ in range(num_test_inputs)]
        submission_json[task_id] = blank_predictions

    for task_id in test_challenges.keys():
        if task_id not in submission_json:
            num_test_inputs = len(test_challenges[task_id].get('test', []))
            blank_predictions = [{'attempt_1': [], 'attempt_2': []} for _ in range(num_test_inputs)]
            submission_json[task_id] = blank_predictions

    with open(SUBMISSION_PATH, 'w') as f:
        json.dump(submission_json, f)

    print(f'Submission file created at: {SUBMISSION_PATH}')

    print('\n--- Submission Sample ---')
    for i, (task_id, preds) in enumerate(submission_json.items()):
        if i >= 3:
            break
        print(f'\'{task_id}\': {preds}')