In [None]:
from pathlib import Path
import re

import matplotlib.pyplot as plt
import numpy as np

from topostats.plottingfuncs import Colormap

colormap = Colormap().get_cmap()

In [None]:
DATA_DIR = Path("/Users/sylvi/topo_data/hariborings/data/")
OFF_TARGET_DATA_DIR = Path("/Users/sylvi/topo_data/hariborings/off_target/")
TRAINING_DATA_DIR = Path("/Users/sylvi/topo_data/hariborings/training_data/")
FLATTENED_IMAGES_DIR = Path("/Users/sylvi/topo_data/hariborings/flattened_images/")

In [None]:
for sample_type in ["on_target", "off_target"]:
    for sample in (DATA_DIR / sample_type).glob("*.npy"):
        data = np.load(sample)
        plt.imshow(data, vmin=-8, vmax=8, cmap=colormap)
        plt.title(f"{sample_type} {sample.stem}")
        plt.show()
        plt.imsave(f"{FLATTENED_IMAGES_DIR / sample_type / sample.stem}.png", data, vmin=-8, vmax=8, cmap=colormap)

In [None]:
# Print all data files
DATA_ON_TARGET = Path("/Users/sylvi/topo_data/hariborings/data/off_target")
TRAINING_DATA_CROPPED_DIR = TRAINING_DATA_DIR / "cropped" / "images"
files_on_target = list(DATA_ON_TARGET.glob("*.npy"))
for file in files_on_target:
    print(file.stem)

file = files_on_target[14]
image = np.load(file)

plt.imshow(image, cmap=colormap, vmin=-8, vmax=8)
plt.show()

In [None]:
# Crop the image to get molecules of interest
width = 60
x = 245
y = 450
cropped_molecule = image[y : y + width, x : x + width]
plt.imshow(cropped_molecule, vmin=-8, vmax=8, cmap=colormap)
plt.show()

In [None]:
# Save the molecule
# Set i to be the number of molecules saved so far
i = len(list(TRAINING_DATA_CROPPED_DIR.glob("*.npy")))
np.save(TRAINING_DATA_CROPPED_DIR / f"image_{i}.npy", cropped_molecule)
plt.imsave(TRAINING_DATA_CROPPED_DIR / f"image_{i}.png", cropped_molecule, vmin=image.min(), vmax=image.max())

In [None]:
# Rename masks

path = Path("/Users/sylvi/topo_data/hariborings/training_data/cropped/masks/")
i = 0

files = list(path.glob("*.npy"))
files.sort()
for file in files:
    print(file.name)
    new_filename = path / f"mask_{i}.npy"
    file.rename(new_filename)
    i += 1

In [None]:
images = sorted(list(Path("/Users/sylvi/topo_data/hariborings/training_data/cropped/masks/").glob("*.npy")))
print(images[0])
image = np.load(images[0])
plt.imshow(image)
print(np.unique(image))

In [None]:
# Merge multiclass masks into one mask
MULTICLASS_MASK_DIR = Path("/Users/sylvi/topo_data/hariborings/training_data/cropped/multiclass_masks/")

mask_files = sorted(list(MULTICLASS_MASK_DIR.glob("*.npy")))

# Files with the same task id are the same molecule and need to be merged.
i = 0
for task in range(252, 302):
    print(f"task: {task}")
    # Both masks have "task-{task}" in the name.
    # Get the ring mask. It is the only file with both "task-{task}" and "ring" in the name.
    ring_file = [file for file in mask_files if f"task-{task}" in file.name and "ring" in file.name][0]
    gem_file = [file for file in mask_files if f"task-{task}" in file.name and "gem" in file.name][0]
    # print(f"ring_file: {ring_file.stem}, gem_file: {gem_file.stem}")
    ring_mask = np.load(ring_file).astype(bool)
    gem_mask = np.load(gem_file).astype(bool)
    # plt.imshow(ring_mask)
    # plt.show()
    # plt.imshow(gem_mask)
    # plt.show()
    combined_mask = np.zeros_like(ring_mask).astype(int)
    combined_mask[ring_mask] = 1
    combined_mask[gem_mask] = 2
    # plt.imshow(combined_mask)
    # plt.show()
    plt.imsave(MULTICLASS_MASK_DIR / f"mask_{i}.png", combined_mask)
    np.save(MULTICLASS_MASK_DIR / f"mask_{i}.npy", combined_mask)
    i += 1