In [1]:
from pathlib import Path

gray_dir = Path("../data/processed/train/grayscale")
print("Grayscale dir:", gray_dir.resolve())
print("Number of grayscale images:", len(list(gray_dir.glob("*.jpg"))))
print("Example files:", list(gray_dir.glob("*.jpg"))[:5])


Grayscale dir: C:\Users\ASUS\Jupyter\image colourisation\Image-Colourisation\data\processed\train\grayscale
Number of grayscale images: 4995
Example files: [WindowsPath('../data/processed/train/grayscale/00000001.jpg'), WindowsPath('../data/processed/train/grayscale/00000002.jpg'), WindowsPath('../data/processed/train/grayscale/00000003.jpg'), WindowsPath('../data/processed/train/grayscale/00000004.jpg'), WindowsPath('../data/processed/train/grayscale/00000005.jpg')]


In [8]:
# Ensure all the values are normalized between 0 and 1
import numpy as np
from pathlib import Path

def check_npz_range(root_dir):
    root = Path(root_dir)
    files = list(root.rglob("*.npz")) + list(root.rglob("*.npy"))

    if not files:
        print("No .npz or .npy files found.")
        return

    print(f"Found {len(files)} files\n")

    for f in files:
        try:
            if f.suffix == ".npz":
                data = np.load(f)
                for key in data.files:
                    arr = data[key]
                    min_v, max_v = arr.min(), arr.max()
                    print(f"[NPZ] {f.name} ({key}) → min={min_v:.4f}, max={max_v:.4f}")
                    if min_v < -1 or max_v > 1:
                        print("OUT OF RANGE!")
            else:  # .npy
                arr = np.load(f)
                min_v, max_v = arr.min(), arr.max()
                print(f"[NPY] {f.name} → min={min_v:.4f}, max={max_v:.4f}")
                if min_v < -1 or max_v > 1:
                    print("OUT OF RANGE!")

        except Exception as e:
            print(f"Error reading {f}: {e}")

In [274]:

from pathlib import Path
jpg_dir = Path("../data/processed/")

jpg_files = list(jpg_dir.rglob("*.jpg"))
print(f"Total .jpg files: {len(jpg_files)}")


Total .jpg files: 5000


In [None]:

from pathlib import Path

GRAY_DIR = Path("../data/processed/grayscale")
AB_DIR = Path("../data/processed/color")

def sanity_check(gray_dir=GRAY_DIR, ab_dir=AB_DIR):
    gray_files = sorted([p for p in gray_dir.rglob("*.jpg")])
    ab_files = sorted([p for p in ab_dir.rglob("*.npy")])

    # Convert list to "relative path without extension" sets
    gray_set = {str(p.relative_to(gray_dir).with_suffix("")) for p in gray_files}
    ab_set   = {str(p.relative_to(ab_dir).with_suffix("")) for p in ab_files}

    # Compute mismatches
    missing_ab = gray_set - ab_set         # JPG exists but no corresponding NPY
    extra_ab = ab_set - gray_set           # NPY exists but no corresponding JPG

    print("=== SANITY CHECK RESULTS ===")

    if not missing_ab and not extra_ab:
        print("✔ All grayscale .jpg files have matching .npy files.")
        print("✔ No extra .npy files found.")
    else:
        if missing_ab:
            print("\n❌ Missing .npy for the following .jpg files:")
            for p in sorted(missing_ab):
                print("  -", p)

        if extra_ab:
            print("\n❌ Unmatched .npy files found (no corresponding .jpg):")
            for p in sorted(extra_ab):
                print("  -", p)

    print("\nSummary:")
    print(f"Total JPG: {len(gray_files)}")
    print(f"Total NPY: {len(ab_files)}")
    print(f"Missing NPY: {len(missing_ab)}")
    print(f"Extra NPY: {len(extra_ab)}")


if __name__ == "__main__":
    sanity_check()


=== SANITY CHECK RESULTS ===
✔ All grayscale .jpg files have matching .npy files.
✔ No extra .npy files found.

Summary:


AttributeError: 'set' object has no attribute 'len'