In [1]:
# Import necessary libraries
import os
import json
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set Seaborn style for better visualizations
sns.set(style="whitegrid")

# Function to load a JSON file
def load_json(file_path):
    with open(file_path, 'r') as f:
        data = json.load(f)
    return data

# Function to check for symmetry
def check_symmetry(matrix):
    vertical_symmetry = np.array_equal(matrix, np.flipud(matrix))
    horizontal_symmetry = np.array_equal(matrix, np.fliplr(matrix))
    return vertical_symmetry, horizontal_symmetry

# Function to count unique objects in the matrix
def count_objects(matrix):
    unique_elements, counts = np.unique(matrix, return_counts=True)
    return dict(zip(unique_elements, counts))

# Function to check for repeating patterns
def check_repeating_patterns(matrix):
    patterns = {}
    for row in matrix:
        for size in range(1, len(row) // 2 + 1):
            for start in range(len(row) - size + 1):
                pattern = tuple(row[start:start + size])
                if pattern in patterns:
                    patterns[pattern] += 1
                else:
                    patterns[pattern] = 1
    return patterns

# Function to extract features from a single puzzle
def extract_features(puzzle):
    input_matrix = np.array(puzzle['input'])
    output_matrix = np.array(puzzle['output'])
    
    features = {}
    features['input_symmetry'] = check_symmetry(input_matrix)
    features['output_symmetry'] = check_symmetry(output_matrix)
    features['input_object_counts'] = count_objects(input_matrix)
    features['output_object_counts'] = count_objects(output_matrix)
    features['input_repeating_patterns'] = check_repeating_patterns(input_matrix)
    features['output_repeating_patterns'] = check_repeating_patterns(output_matrix)
    
    return features

# Directory containing the training data
data_dir = '../data/training'

# Collect features from all JSON files
all_features = []

for file_name in os.listdir(data_dir):
    if file_name.endswith('.json'):
        file_path = os.path.join(data_dir, file_name)
        data = load_json(file_path)
        for puzzle in data['train'] + data['test']:
            features = extract_features(puzzle)
            all_features.append(features)

# Displaying some of the extracted features for the first few puzzles
for i, features in enumerate(all_features[:5]):
    print(f"Puzzle {i + 1}:")
    print("Input Symmetry (Vertical, Horizontal):", features['input_symmetry'])
    print("Output Symmetry (Vertical, Horizontal):", features['output_symmetry'])
    print("Input Object Counts:", features['input_object_counts'])
    print("Output Object Counts:", features['output_object_counts'])
    print("Input Repeating Patterns:", features['input_repeating_patterns'])
    print("Output Repeating Patterns:", features['output_repeating_patterns'])
    print()


Puzzle 1:
Input Symmetry (Vertical, Horizontal): (False, False)
Output Symmetry (Vertical, Horizontal): (False, False)
Input Object Counts: {np.int64(1): np.int64(1), np.int64(2): np.int64(131), np.int64(4): np.int64(1), np.int64(8): np.int64(95)}
Output Object Counts: {np.int64(1): np.int64(15), np.int64(2): np.int64(115), np.int64(4): np.int64(17), np.int64(8): np.int64(81)}
Input Repeating Patterns: {(np.int64(8),): 95, (np.int64(2),): 131, (np.int64(8), np.int64(8)): 70, (np.int64(8), np.int64(2)): 12, (np.int64(2), np.int64(2)): 111, (np.int64(2), np.int64(8)): 12, (np.int64(8), np.int64(8), np.int64(8)): 47, (np.int64(8), np.int64(8), np.int64(2)): 11, (np.int64(8), np.int64(2), np.int64(2)): 11, (np.int64(2), np.int64(2), np.int64(2)): 92, (np.int64(2), np.int64(2), np.int64(8)): 12, (np.int64(2), np.int64(8), np.int64(8)): 12, (np.int64(8), np.int64(8), np.int64(8), np.int64(2)): 11, (np.int64(8), np.int64(8), np.int64(2), np.int64(2)): 10, (np.int64(8), np.int64(2), np.int64(2