In [None]:
from pathlib import Path

import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

In [None]:
files = [
    Path("image_1.png"),
    Path("image_2.png"),
    Path("image_3.png"),
    Path("image_10.png"),
    Path("image_11.png"),
    Path("image_12.png"),
]
# Scramble the order of the files
np.random.shuffle(files)
print(files)
files.sort(key=lambda f: int("".join(filter(str.isdigit, f.name))))
print(files)

In [None]:
mask_dir = Path("/Users/sylvi/topo_data/hariborings/training_data/cropped/dna_only/masks_256/")

# Rename all the mask files to mask_i.png where i is the index of the corresponding image file, sorted by how they are sorted in macos finder
masks = list(mask_dir.glob("*.npy"))
# Files have names like task-377-annotation-314-by-1-tag-mask-0.png
masks.sort(key=lambda f: int("".join(filter(str.isdigit, f.name))))

for i, mask in enumerate(masks):
    mask.rename(mask_dir / f"mask_{i}.npy")

In [None]:
original_file_dir = Path("/Users/sylvi/topo_data/hariborings/training_data/cropped/dna_only/images/")
file_dir_256 = Path("/Users/sylvi/topo_data/hariborings/training_data/cropped/dna_only/images_256_FIXED/")

original_image_files = original_file_dir.glob("*.png")
original_image_files = list(original_image_files)

image_files_256 = file_dir_256.glob("*.png")
image_files_256 = list(image_files_256)

print(len(original_image_files))
print(len(image_files_256))

# Sort them based on the number in the filename
original_image_files.sort(key=lambda f: int("".join(filter(str.isdigit, f.name))))
image_files_256.sort(key=lambda f: int("".join(filter(str.isdigit, f.name))))
for original_image_file, image_file in zip(original_image_files, image_files_256):
    # Plot them
    original_image = Image.open(original_image_file)
    image = Image.open(image_file)
    fig, ax = plt.subplots(1, 2)
    ax[0].imshow(original_image)
    ax[0].set_title(original_image_file.name)
    ax[1].imshow(image)
    ax[1].set_title(image_file.name)
    plt.show()

In [None]:
file_dir = Path("/Users/sylvi/topo_data/hariborings/training_data/cropped/dna_only/")

image_dir = file_dir / "images_256/"
print(f"image_dir: {image_dir}")
label_dir = file_dir / "masks_256/"
print(f"label_dir: {label_dir}")

# Get all the .npy files and sort them by the number in their filename, eg: image_1.npy
image_paths = list(image_dir.glob("*.npy"))
image_paths = sorted(image_paths, key=lambda f: int("".join(filter(str.isdigit, f.name))))
label_paths = list(label_dir.glob("*.npy"))
label_paths = sorted(label_paths, key=lambda f: int("".join(filter(str.isdigit, f.name))))

print(f"Number of images: {len(image_paths)}")
print(f"Number of labels: {len(label_paths)}")

for image_path, label_path in zip(image_paths, label_paths):
    image = np.load(image_path)
    # Load the png image file, convert to numpy array
    image_png_path = image_path.with_suffix(".png")
    image_png = np.array(Image.open(image_png_path))

    label = np.load(label_path)
    # Load the png label file, convert to numpy array
    label_png_path = label_path.with_suffix(".png")
    label_png = np.array(Image.open(label_png_path))

    # Plot them side by side and then overlaid
    fig, axes = plt.subplots(1, 5, figsize=(12, 4))
    axes[0].imshow(image)
    axes[0].set_title("Image")
    axes[1].imshow(image_png)
    axes[1].set_title("Image PNG")
    axes[2].imshow(label)
    axes[2].set_title("Label")
    axes[3].imshow(label_png)
    axes[3].set_title("Label PNG")
    axes[4].imshow(label)
    axes[4].imshow(image, alpha=0.9)
    axes[4].set_title("Overlay")
    # Set title to the name of the file
    fig.suptitle(f"{image_path.name} {label_path.name}")
    plt.show()