In [1]:
# Add parent directory to path to import py222
import sys
from pathlib import Path
sys.path.insert(0, str(Path.cwd().parent))

In [3]:
# test py222
import py222

s = py222.scramble(20)
print(s)
solutions = py222.solveCube(s)
print(solutions)
for sol in solutions:
    print(py222.doMoves(s, sol))

[3 5 2 2 0 1 1 0 3 1 0 5 2 0 3 5 2 4 4 4 3 1 4 5]
[[3, 2, 8, 1, 6, 1, 3, 2, 4, 8], [5, 0, 4, 0, 3, 1, 5, 8, 3, 2], [5, 0, 4, 0, 4, 8, 5, 0, 4, 2], [5, 2, 8, 0, 8, 0, 7, 3, 0, 7], [7, 3, 1, 3, 7, 0, 4, 6, 5, 8]]
[0 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4 5 5 5 5]
[0 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4 5 5 5 5]
[0 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4 5 5 5 5]
[0 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4 5 5 5 5]
[0 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4 5 5 5 5]


In [4]:
import numpy as np
import pandas as pd
import json
from pathlib import Path

DATA_DIR = Path.cwd().parent / "data" / "cube-2-by-2-all-solutions"

records = []
for split in ["train", "test", "val"]:
    split_dir = DATA_DIR / split
    if not split_dir.exists():
        continue

    inputs = np.load(split_dir / f"{split}__inputs.npy")
    labels = np.load(split_dir / f"{split}__labels.npy")
    group_indices = np.load(split_dir / f"{split}__group_indices.npy")
    puzzle_indices = np.load(split_dir / f"{split}__puzzle_indices.npy")

    with open(split_dir / "dataset.json") as f:
        meta = json.load(f)

    num_groups = meta["total_groups"]
    num_examples = len(inputs)
    num_solutions_per_example = (labels.sum(axis=-1) > 0).sum(axis=1)

    # Get solution lengths
    solution_lengths = []
    for gid in range(num_groups):
        first_puzzle_idx = group_indices[gid]
        first_solution = labels[first_puzzle_idx, 0]
        sol_len = (first_solution > 0).sum()
        if sol_len > 0:
            solution_lengths.append(sol_len)

    records.append({
        "split": split,
        "num_examples": num_examples,
        "num_groups": num_groups,
        "mean_examples_per_group": meta["mean_puzzle_examples"],
        "labels_shape": str(labels.shape),
        "max_solutions_per_state": labels.shape[1],
        "mean_solutions_per_state": num_solutions_per_example.mean(),
        "median_solutions_per_state": np.median(num_solutions_per_example),
        "min_solutions_per_state": num_solutions_per_example.min(),
        "max_solutions_per_state_actual": num_solutions_per_example.max(),
        "mean_solution_length": np.mean(solution_lengths),
        "median_solution_length": np.median(solution_lengths),
        "min_solution_length": min(solution_lengths),
        "max_solution_length": max(solution_lengths),
        "vocab_size": meta["vocab_size"],
        "seq_len": meta["seq_len"],
    })

df = pd.DataFrame(records)
display(df.T)

Unnamed: 0,0,1,2
split,train,test,val
num_examples,724406,90427,90555
num_groups,32292,4036,4037
mean_examples_per_group,22.432986,22.405104,22.431261
labels_shape,"(724406, 132, 11)","(90427, 132, 11)","(90555, 132, 11)"
max_solutions_per_state,132,132,132
mean_solutions_per_state,3.593902,3.593562,3.590183
median_solutions_per_state,2.0,2.0,2.0
min_solutions_per_state,1,1,1
max_solutions_per_state_actual,132,77,42


In [6]:
# Load train dataset
train_dir = DATA_DIR / "train"
inputs = np.load(train_dir / "train__inputs.npy")
labels = np.load(train_dir / "train__labels.npy")
group_indices = np.load(train_dir / "train__group_indices.npy")

print(f"Total groups in dataset: {len(group_indices) - 1}")

# Pick a random group
group_id = np.random.randint(0, len(group_indices) - 1)
group_start = group_indices[group_id]
group_end = group_indices[group_id + 1]

print(f"Selected group {group_id} with {group_end - group_start} orientations")

# Extract and verify all orientations in the group
orientation_results = []

for puzzle_idx in range(group_start, group_end):
    state_encoded = inputs[puzzle_idx]
    state = state_encoded - 1
    solutions = labels[puzzle_idx]

    num_solutions = 0
    solution_lengths = []
    for sol_idx in range(solutions.shape[0]):
        sol = solutions[sol_idx]
        sol_len = (sol > 0).sum()
        if sol_len > 0:
            num_solutions += 1
            solution_lengths.append(sol_len)

    # Verify each solution actually solves the state
    all_valid = True
    for sol_idx in range(solutions.shape[0]):
        sol = solutions[sol_idx]
        sol_len = (sol > 0).sum()
        if sol_len > 0:
            moves = (sol[:sol_len] - 1).astype(int)
            # Check if all moves are valid
            if not all(0 <= m <= 17 for m in moves):
                all_valid = False
                break
            result = py222.doMoves(state.copy(), moves)
            if not py222.isSolved(result):
                all_valid = False
                break

    orientation_results.append({
        "puzzle_idx": puzzle_idx,
        "state_first_6": state,
        "num_solutions": num_solutions,
        "optimal_depth": solution_lengths[0] if solution_lengths else 0,
        "all_valid": all_valid,
    })

# Display results
results_df = pd.DataFrame(orientation_results)
print(f"\nOrientations in group: {len(results_df)}")
print(f"All solutions verified: {results_df['all_valid'].all()}")
display(results_df)

first_puzzle_idx = group_start
state_encoded = inputs[first_puzzle_idx]
state = state_encoded - 1
solutions = labels[first_puzzle_idx]

# All moves (U, U', U2, R, R', R2, F, F', F2, D, D', D2, L, L', L2, B, B', B2)
move_names = {0: "U", 1: "U'", 2: "U2", 3: "R", 4: "R'", 5: "R2", 6: "F", 7: "F'", 8: "F2",
              9: "D", 10: "D'", 11: "D2", 12: "L", 13: "L'", 14: "L2", 15: "B", 16: "B'", 17: "B2"}
print(f"\nAll solutions for first orientation (puzzle {first_puzzle_idx}):")
for sol_idx in range(solutions.shape[0]):
    sol = solutions[sol_idx]
    sol_len = (sol > 0).sum()
    if sol_len > 0:
        moves = (sol[:sol_len] - 1).astype(int)
        valid_moves = [m for m in moves if 0 <= m <= 17]
        if len(valid_moves) == len(moves):
            move_str = " ".join(move_names[m] for m in valid_moves)
            print(f"  [{sol_idx+1}] {move_str}")
        else:
            print(f"  [{sol_idx+1}] Invalid moves (raw values: {list(sol[:sol_len])})")

Total groups in dataset: 32292
Selected group 3351 with 23 orientations

Orientations in group: 23
All solutions verified: True


Unnamed: 0,puzzle_idx,state_first_6,num_solutions,optimal_depth,all_valid
0,73899,"[4, 2, 4, 1, 3, 1, 3, 5, 0, 2, 1, 2, 0, 4, 3, ...",2,8,True
1,73900,"[1, 1, 4, 2, 3, 3, 0, 5, 0, 4, 0, 5, 1, 4, 3, ...",2,8,True
2,73901,"[0, 3, 5, 3, 2, 5, 3, 2, 1, 1, 4, 2, 0, 4, 3, ...",2,8,True
3,73902,"[2, 5, 0, 2, 3, 4, 5, 5, 1, 4, 3, 0, 2, 1, 3, ...",2,8,True
4,73903,"[1, 3, 3, 0, 1, 1, 4, 0, 4, 2, 0, 0, 5, 5, 3, ...",2,8,True
5,73904,"[1, 5, 4, 1, 0, 1, 0, 3, 5, 5, 2, 4, 3, 2, 3, ...",2,8,True
6,73905,"[5, 2, 2, 1, 3, 0, 4, 1, 0, 2, 0, 3, 4, 2, 3, ...",2,8,True
7,73906,"[1, 4, 0, 3, 2, 0, 2, 4, 5, 1, 0, 1, 2, 0, 3, ...",2,8,True
8,73907,"[4, 2, 1, 5, 0, 1, 3, 0, 3, 4, 3, 5, 4, 1, 3, ...",2,8,True
9,73908,"[0, 3, 2, 5, 1, 5, 2, 2, 1, 0, 4, 1, 0, 0, 3, ...",2,8,True



All solutions for first orientation (puzzle 73899):
  [1] F L' F U2 L' F' L F'
  [2] F L' F2 U F' L' U2 F2
