# Crossword grid detection
- Aim is to detect crossword grid structure.

Failures
- Big bang approach to finding grid lines, had to break this into multiple steps

# Detect grids

In [1]:
import os
import json
import numpy as np
from glob import glob
from grid_detect import get_crossword_grid_array


SOLUTIONS_DIR = "../dataset/solutions"
IMAGES_DIR = "../dataset/images"
MAX_FILES = 100  # set this to None to process all


def load_solution_grid(json_path):
    """Load the solution JSON and return the grid as numpy array."""
    with open(json_path, "r") as f:
        data = json.load(f)
    grid = np.array(data["grid"], dtype=int)
    return grid


def compare_grids(grid1, grid2):
    """Compare two grids and return True if identical, False otherwise."""
    if grid1.shape != grid2.shape:
        return False
    return np.array_equal(grid1, grid2)


def main(max_files=None):
    solution_files = glob(os.path.join(SOLUTIONS_DIR, "*.json"))

    if max_files is not None:
        solution_files = solution_files[:max_files]

    total = 0
    matching = 0
    non_matching = 0

    for sol_path in solution_files:
        base = os.path.basename(sol_path).replace("_solution.json", "")
        img_path = os.path.join(IMAGES_DIR, f"{base}.png")

        if not os.path.exists(img_path):
            print(f"⚠️ No image found for {sol_path}")
            continue

        try:
            sol_grid = load_solution_grid(sol_path)
            det_grid = get_crossword_grid_array(img_path)

            if compare_grids(sol_grid, det_grid):
                print(f"✅ Match: {base}")
                matching += 1
            else:
                print(f"❌ Mismatch: {base} (solution {sol_grid.shape}, detected {det_grid.shape})")
                non_matching += 1

            total += 1

        except Exception as e:
            print(f"⚠️ Error processing {base}: {e}")
            non_matching += 1
            total += 1

    print("\n==== SUMMARY ====")
    print(f"Total puzzles:   {total}")
    print(f"Matching:        {matching}")
    print(f"Non-matching:    {non_matching}")


if __name__ == "__main__":
    main(max_files=MAX_FILES)


Error: Could not load image from dataset/images/daily-1994-02-Feb0494.png
✅ Match: daily-2009-10-Oct1609


[ WARN:0@0.023] global loadsave.cpp:275 findDecoder imread_('dataset/images/daily-1994-02-Feb0494.png'): can't open/read file: check file path/integrity


❌ Mismatch: daily-2005-04-Apr0305 (solution (21, 21), detected (11, 21))
✅ Match: daily-1995-08-Aug3195
✅ Match: daily-2001-04-Apr1401
✅ Match: daily-2014-04-Apr2414
✅ Match: daily-1996-10-Oct1596
✅ Match: daily-2005-11-Nov0105
✅ Match: daily-1995-07-Jul1895
✅ Match: variety-2009-Feb0809.2
✅ Match: daily-2009-06-Jun2209
✅ Match: daily-2007-05-May0407
✅ Match: daily-2003-10-Oct2403
✅ Match: daily-2008-09-Sep2908
✅ Match: daily-2003-08-Aug1603
✅ Match: daily-2000-05-May1900
✅ Match: daily-2000-11-Nov2400
✅ Match: daily-1994-09-Sep1494
✅ Match: daily-2006-06-Jun2606
✅ Match: daily-2006-12-Dec0106
✅ Match: daily-1994-02-Feb0494
✅ Match: daily-2007-09-Sep0607
✅ Match: variety-2007-Aug1207.2
✅ Match: daily-1997-08-Aug1897
✅ Match: daily-1994-07-Jul2094
✅ Match: daily-1997-05-May2997
✅ Match: daily-2006-08-Aug2006
✅ Match: daily-2007-11-Nov0807
✅ Match: daily-1999-03-Mar2099
✅ Match: daily-1999-12-Dec2399
✅ Match: daily-2015-02-Feb1515
✅ Match: daily-2008-07-Jul1808
✅ Match: daily-2002-08-Aug