# Crossword grid detection
- Aim is to detect crossword grid structure.

# Detect grids

In [8]:
import os
import json
import numpy as np
from glob import glob
from grid_detect import extract_crossword_array


SOLUTIONS_DIR = "../dataset/solutions"
IMAGES_DIR = "../dataset/images"
MAX_FILES = 100  # set this to None to process all


def load_solution_grid(json_path):
    """Load the solution JSON and return the grid as numpy array."""
    with open(json_path, "r") as f:
        data = json.load(f)
    grid = np.array(data["grid"], dtype=int)
    return grid


def compare_grids(grid1, grid2):
    """Compare two grids and return True if identical, False otherwise."""
    if grid1.shape != grid2.shape:
        return False
    return np.array_equal(grid1, grid2)


def main(max_files=None):
    solution_files = glob(os.path.join(SOLUTIONS_DIR, "*.json"))

    if max_files is not None:
        solution_files = solution_files[:max_files]

    total = 0
    matching = 0
    non_matching = 0

    for sol_path in solution_files:
        base = os.path.basename(sol_path).replace("_solution.json", "")
        img_path = os.path.join(IMAGES_DIR, f"{base}.png")

        if not os.path.exists(img_path):
            print(f"⚠️ No image found for {sol_path}")
            continue

        try:
            sol_grid = load_solution_grid(sol_path)
            det_grid = extract_crossword_array(img_path, debug=False)

            if compare_grids(sol_grid, det_grid):
                print(f"✅ Match: {base}")
                matching += 1
            else:
                print(f"❌ Mismatch: {base} (solution {sol_grid.shape}, detected {det_grid.shape})")
                non_matching += 1

            total += 1

        except Exception as e:
            print(f"⚠️ Error processing {base}: {e}")
            non_matching += 1
            total += 1

    print("\n==== SUMMARY ====")
    print(f"Total puzzles:   {total}")
    print(f"Matching:        {matching}")
    print(f"Non-matching:    {non_matching}")


if __name__ == "__main__":
    main(max_files=MAX_FILES)


❌ Mismatch: daily-2012-07-Jul1412 (solution (15, 15), detected (16, 16))
❌ Mismatch: daily-2014-11-Nov0814 (solution (15, 15), detected (15, 15))
❌ Mismatch: daily-2002-03-Mar3002 (solution (15, 15), detected (16, 16))
❌ Mismatch: daily-2002-10-Oct1502 (solution (15, 15), detected (16, 16))
❌ Mismatch: daily-2008-02-Feb0408 (solution (15, 15), detected (15, 15))
⚠️ Error processing daily-2014-10-Oct1314: index 0 is out of bounds for axis 0 with size 0
❌ Mismatch: daily-2008-06-Jun0108 (solution (21, 21), detected (20, 20))
❌ Mismatch: daily-2008-12-Dec0808 (solution (15, 15), detected (17, 17))
❌ Mismatch: daily-2008-03-Mar0508 (solution (15, 15), detected (16, 16))
⚠️ Error processing daily-2019-02-Feb2419: index 2 is out of bounds for axis 0 with size 2
❌ Mismatch: daily-2004-04-Apr2504 (solution (23, 23), detected (24, 24))
❌ Mismatch: daily-1996-05-May0896 (solution (15, 15), detected (14, 14))
❌ Mismatch: daily-2000-07-Jul2200 (solution (15, 15), detected (16, 16))
❌ Mismatch: dai