## Setup


In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from pathlib import Path
from rich import print as rprint
from loguru import logger as lg

from snap_fit.puzzle.sheet import Sheet
from snap_fit.puzzle.sheet_manager import SheetManager
from snap_fit.puzzle.piece_matcher import PieceMatcher
from snap_fit.data_models import SheetRecord, PieceRecord, MatchResult, PieceId

In [None]:
# Paths
REPO_ROOT = Path.cwd().parent.parent
DATA_ROOT = REPO_ROOT / "data" / "sample"
OUTPUT_DIR = Path.cwd() / "output"
OUTPUT_DIR.mkdir(exist_ok=True)

METADATA_PATH = OUTPUT_DIR / "metadata.json"
CONTOUR_CACHE_DIR = OUTPUT_DIR / "contour_cache"
MATCHES_PATH = OUTPUT_DIR / "matches.json"

rprint(f"Data root: {DATA_ROOT}")
rprint(f"Output dir: {OUTPUT_DIR}")

## Phase 1: Load Sheets from Images


In [None]:
# Check available sample images
sample_images = sorted(DATA_ROOT.glob("*.jpg")) + sorted(DATA_ROOT.glob("*.png"))
rprint(f"Found {len(sample_images)} sample images:")
for img in sample_images[:5]:
    rprint(f"  - {img.name}")

In [None]:
# Load sheets
manager = SheetManager()


def load_sheet(path: Path) -> Sheet:
    """Load a sheet from an image file."""
    return Sheet(img_fp=path, min_area=50_000)


# Load first few images for testing
for img_path in sample_images[:3]:  # Limit for quick test
    sheet = load_sheet(img_path)
    manager.add_sheet(sheet, img_path.stem)

rprint(f"Loaded {len(manager.sheets)} sheets")
rprint(f"Total pieces: {len(manager.get_pieces_ls())}")

## Phase 2: Save Metadata + Contour Cache


In [None]:
# Save metadata JSON
manager.save_metadata(METADATA_PATH, data_root=DATA_ROOT)
rprint(f"Metadata saved to: {METADATA_PATH}")
rprint(f"File size: {METADATA_PATH.stat().st_size / 1024:.1f} KB")

In [None]:
# Save contour cache
manager.save_contour_cache(CONTOUR_CACHE_DIR)
rprint(f"Contour cache saved to: {CONTOUR_CACHE_DIR}")

# List cache files
cache_files = list(CONTOUR_CACHE_DIR.glob("*"))
for f in cache_files:
    rprint(f"  - {f.name}: {f.stat().st_size / 1024:.1f} KB")

In [None]:
# Inspect metadata structure
import json

metadata = json.loads(METADATA_PATH.read_text())
rprint(f"Metadata keys: {list(metadata.keys())}")
rprint(f"Sheets: {len(metadata['sheets'])}")
rprint(f"Pieces: {len(metadata['pieces'])}")

# Show first sheet record
rprint("\nFirst sheet record:")
rprint(metadata["sheets"][0])

# Show first piece record
rprint("\nFirst piece record:")
rprint(metadata["pieces"][0])

## Phase 3: Run Matching and Save Results


In [None]:
# Run matching
matcher = PieceMatcher(manager)
matcher.match_all()

rprint(f"Total matches: {len(matcher.results)}")
rprint(f"Top 5 matches:")
for m in matcher.get_top_matches(5):
    rprint(f"  {m.seg_id1} <-> {m.seg_id2}: {m.similarity:.4f}")

In [None]:
# Save matches
matcher.save_matches_json(MATCHES_PATH)
rprint(f"Matches saved to: {MATCHES_PATH}")
rprint(f"File size: {MATCHES_PATH.stat().st_size / 1024:.1f} KB")

## Phase 4: Reload and Verify


In [None]:
# Reload metadata (records only, no full objects)
loaded_metadata = SheetManager.load_metadata(METADATA_PATH)

rprint(f"Loaded {len(loaded_metadata['sheets'])} sheet records")
rprint(f"Loaded {len(loaded_metadata['pieces'])} piece records")

# Validate as Pydantic models
sheet_records = [SheetRecord.model_validate(s) for s in loaded_metadata["sheets"]]
piece_records = [PieceRecord.model_validate(p) for p in loaded_metadata["pieces"]]

rprint(f"\nValidated {len(sheet_records)} SheetRecords")
rprint(f"Validated {len(piece_records)} PieceRecords")

In [None]:
# Test contour cache loading
if piece_records:
    test_piece_id = piece_records[0].piece_id
    contour, corners = SheetManager.load_contour_for_piece(
        test_piece_id, CONTOUR_CACHE_DIR
    )

    rprint(f"Loaded contour for {test_piece_id}:")
    rprint(f"  Shape: {contour.shape}")
    rprint(f"  Points: {len(contour)}")
    rprint(f"  Corner indices: {corners}")

In [None]:
# Reload matches into a fresh matcher
matcher2 = PieceMatcher(manager)
matcher2.load_matches_json(MATCHES_PATH)

rprint(f"Loaded {len(matcher2.results)} matches")
rprint(f"Lookup cache size: {len(matcher2._lookup)}")

# Verify data integrity
assert len(matcher2.results) == len(matcher.results), "Match count mismatch!"

# Compare top matches
orig_top = matcher.get_top_matches(5)
loaded_top = matcher2.get_top_matches(5)

rprint("\nTop 5 matches comparison:")
for orig, loaded in zip(orig_top, loaded_top):
    match_ok = orig.seg_id1 == loaded.seg_id1 and orig.similarity == loaded.similarity
    status = "✅" if match_ok else "❌"
    rprint(f"  {status} {orig.similarity:.4f} vs {loaded.similarity:.4f}")

rprint("\n✅ Round-trip validation passed!")

## Phase 5: Test Incremental Matching


In [None]:
# Simulate adding a new sheet
if len(sample_images) > 3:
    new_img = sample_images[3]
    new_sheet = load_sheet(new_img)
    manager.add_sheet(new_sheet, new_img.stem)

    # Get piece IDs from the new sheet
    new_piece_ids = [p.piece_id for p in new_sheet.pieces]
    rprint(f"Added new sheet: {new_img.stem}")
    rprint(f"New pieces: {len(new_piece_ids)}")

    # Run incremental matching
    matches_before = len(matcher2.results)
    new_matches = matcher2.match_incremental(new_piece_ids)
    matches_after = len(matcher2.results)

    rprint(f"\nMatches before: {matches_before}")
    rprint(f"New matches added: {new_matches}")
    rprint(f"Matches after: {matches_after}")
else:
    rprint("Not enough sample images for incremental test")

## Summary


In [None]:
# Summary stats
rprint("=" * 50)
rprint("PERSISTENCE SUMMARY")
rprint("=" * 50)
rprint(f"Sheets loaded: {len(manager.sheets)}")
rprint(f"Total pieces: {len(manager.get_pieces_ls())}")
rprint(f"Total matches: {len(matcher2.results)}")
rprint(f"\nFile sizes:")
rprint(f"  Metadata JSON: {METADATA_PATH.stat().st_size / 1024:.1f} KB")
rprint(f"  Matches JSON: {MATCHES_PATH.stat().st_size / 1024:.1f} KB")

total_cache = sum(f.stat().st_size for f in CONTOUR_CACHE_DIR.glob("*"))
rprint(f"  Contour cache: {total_cache / 1024:.1f} KB")
rprint("\n✅ All persistence methods working correctly!")