In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/arc-prize-2024/arc-agi_training_solutions.json
/kaggle/input/arc-prize-2024/arc-agi_evaluation_solutions.json
/kaggle/input/arc-prize-2024/arc-agi_evaluation_challenges.json
/kaggle/input/arc-prize-2024/sample_submission.json
/kaggle/input/arc-prize-2024/arc-agi_training_challenges.json
/kaggle/input/arc-prize-2024/arc-agi_test_challenges.json


In [2]:
import json

training_solutions_path = '/kaggle/input/arc-prize-2024/arc-agi_training_solutions.json'
evaluation_solutions_path = '/kaggle/input/arc-prize-2024/arc-agi_evaluation_solutions.json'
evaluation_challenges_path = '/kaggle/input/arc-prize-2024/arc-agi_evaluation_challenges.json'
sample_submission_path = '/kaggle/input/arc-prize-2024/sample_submission.json'
training_challenges_path = '/kaggle/input/arc-prize-2024/arc-agi_training_challenges.json'
test_challenges_path = '/kaggle/input/arc-prize-2024/arc-agi_test_challenges.json'

#function to load JSON data
def load_json_data(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

#load each dataset
training_solutions = load_json_data(training_solutions_path)
evaluation_solutions = load_json_data(evaluation_solutions_path)
evaluation_challenges = load_json_data(evaluation_challenges_path)
sample_submission = load_json_data(sample_submission_path)
training_challenges = load_json_data(training_challenges_path)
test_challenges = load_json_data(test_challenges_path)

In [3]:
def inspect_dataset_structure(dataset, dataset_name):
    print(f"{dataset_name} Structure Overview:")
    if isinstance(dataset, dict):
        print(f"- Type: Dictionary")
        print(f"- Number of Keys: {len(dataset.keys())}")
    elif isinstance(dataset, list):
        print(f"- Type: List")
        print(f"- Number of Elements: {len(dataset)}")
        if dataset:
            first_item = dataset[0]
            if isinstance(first_item, dict):
                print(f"  First Element Structure:")
                print(f"  - Keys: {list(first_item.keys())}")
            elif isinstance(first_item, (np.ndarray, torch.Tensor)):
                print(f"  First Element Shape: {first_item.shape}")
                print(f"  First Element Type: {type(first_item)}")
            else:
                print(f"  First Element Type: {type(first_item)}")
    else:
        print(f"- Unknown Type or Empty")

inspect_dataset_structure(training_solutions, "Training Solutions")
inspect_dataset_structure(evaluation_solutions, "Evaluation Solutions")
inspect_dataset_structure(evaluation_challenges, "Evaluation Challenges")
inspect_dataset_structure(sample_submission, "Sample Submission")
inspect_dataset_structure(training_challenges, "Training Challenges")
inspect_dataset_structure(test_challenges, "Test Challenges")

Training Solutions Structure Overview:
- Type: Dictionary
- Number of Keys: 400
Evaluation Solutions Structure Overview:
- Type: Dictionary
- Number of Keys: 400
Evaluation Challenges Structure Overview:
- Type: Dictionary
- Number of Keys: 400
Sample Submission Structure Overview:
- Type: Dictionary
- Number of Keys: 100
Training Challenges Structure Overview:
- Type: Dictionary
- Number of Keys: 400
Test Challenges Structure Overview:
- Type: Dictionary
- Number of Keys: 100


In [4]:
#define functions for preprocessing each dataset

def preprocess_training_solutions(training_solutions):
    #convert JSON objects to Python dictionaries if needed
    if isinstance(training_solutions, str):
        training_solutions = json.loads(training_solutions)
    
    #print the structure and length of the dataset
    print("Training Solutions Structure Overview:")
    print("- Type:", type(training_solutions))
    print("- Number of Keys:", len(training_solutions))
    
    return training_solutions

def preprocess_evaluation_solutions(evaluation_solutions):
    #convert JSON objects to Python dictionaries if needed
    if isinstance(evaluation_solutions, str):
        evaluation_solutions = json.loads(evaluation_solutions)
    
    #print the structure and length of the dataset
    print("Evaluation Solutions Structure Overview:")
    print("- Type:", type(evaluation_solutions))
    print("- Number of Keys:", len(evaluation_solutions))
    
    return evaluation_solutions

def preprocess_evaluation_challenges(evaluation_challenges):
    #convert JSON objects to Python dictionaries if needed
    if isinstance(evaluation_challenges, str):
        evaluation_challenges = json.loads(evaluation_challenges)
    
    #print the structure and length of the dataset
    print("Evaluation Challenges Structure Overview:")
    print("- Type:", type(evaluation_challenges))
    print("- Number of Keys:", len(evaluation_challenges))
    
    return evaluation_challenges

def preprocess_sample_submission(sample_submission):
    #convert JSON objects to Python dictionaries if needed
    if isinstance(sample_submission, str):
        sample_submission = json.loads(sample_submission)
    
    #print the structure and length of the dataset
    print("Sample Submission Structure Overview:")
    print("- Type:", type(sample_submission))
    print("- Number of Keys:", len(sample_submission))
    
    return sample_submission

def preprocess_training_challenges(training_challenges):
    #convert JSON objects to Python dictionaries if needed
    if isinstance(training_challenges, str):
        training_challenges = json.loads(training_challenges)
    
    #print the structure and length of the dataset
    print("Training Challenges Structure Overview:")
    print("- Type:", type(training_challenges))
    print("- Number of Keys:", len(training_challenges))
    
    return training_challenges

def preprocess_test_challenges(test_challenges):
    #convert JSON objects to Python dictionaries if needed
    if isinstance(test_challenges, str):
        test_challenges = json.loads(test_challenges)
    
    #print the structure and length of the dataset
    print("Test Challenges Structure Overview:")
    print("- Type:", type(test_challenges))
    print("- Number of Keys:", len(test_challenges))
    
    return test_challenges

#preprocess each dataset
preprocessed_training_solutions = preprocess_training_solutions(training_solutions)
preprocessed_evaluation_solutions = preprocess_evaluation_solutions(evaluation_solutions)
preprocessed_evaluation_challenges = preprocess_evaluation_challenges(evaluation_challenges)
preprocessed_sample_submission = preprocess_sample_submission(sample_submission)
preprocessed_training_challenges = preprocess_training_challenges(training_challenges)
preprocessed_test_challenges = preprocess_test_challenges(test_challenges)

print("Training Solutions Preprocessed:", preprocessed_training_solutions)
print("Evaluation Solutions Preprocessed:", preprocessed_evaluation_solutions)
print("Evaluation Challenges Preprocessed:", preprocessed_evaluation_challenges)
print("Sample Submission Preprocessed:", preprocessed_sample_submission)
print("Training Challenges Preprocessed:", preprocessed_training_challenges)
print("Test Challenges Preprocessed:", preprocessed_test_challenges)

Training Solutions Structure Overview:
- Type: <class 'dict'>
- Number of Keys: 400
Evaluation Solutions Structure Overview:
- Type: <class 'dict'>
- Number of Keys: 400
Evaluation Challenges Structure Overview:
- Type: <class 'dict'>
- Number of Keys: 400
Sample Submission Structure Overview:
- Type: <class 'dict'>
- Number of Keys: 100
Training Challenges Structure Overview:
- Type: <class 'dict'>
- Number of Keys: 400
Test Challenges Structure Overview:
- Type: <class 'dict'>
- Number of Keys: 100
Training Solutions Preprocessed: {'007bbfb7': [[[7, 0, 7, 0, 0, 0, 7, 0, 7], [7, 0, 7, 0, 0, 0, 7, 0, 7], [7, 7, 0, 0, 0, 0, 7, 7, 0], [7, 0, 7, 0, 0, 0, 7, 0, 7], [7, 0, 7, 0, 0, 0, 7, 0, 7], [7, 7, 0, 0, 0, 0, 7, 7, 0], [7, 0, 7, 7, 0, 7, 0, 0, 0], [7, 0, 7, 7, 0, 7, 0, 0, 0], [7, 7, 0, 7, 7, 0, 0, 0, 0]]], '00d62c1b': [[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 3, 4, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,