In [None]:
import pickle
from datetime import datetime
from pathlib import Path

import h5py
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from topostats.io import LoadScans

today = datetime.today().strftime("D-%Y-%m-%d-T-%H-%M")
print(today)

In [None]:
base_dir = Path("/Users/sylvi/topo_data/beaks")
data_dir = base_dir / "data"
assert data_dir.exists()

topo_files = list(data_dir.glob("*.topostats"))
print(f"num files: {len(topo_files)}")

sample_set = topo_files[:]
loadscans = LoadScans(sample_set, channel="dummy")
loadscans.get_data()
img_dict = loadscans.img_dict

In [None]:
# convert to index based dictionary for easier handling

image_dict = {}

for index, (filename, image_preprocessed_data) in enumerate(img_dict.items()):
    image_dict[index] = image_preprocessed_data

print(image_dict.keys())
print(f"num: {len(image_dict)}")

# save to pickle
images_before_processing_dir = base_dir / f"images-topostats-processed"
images_before_processing_dir.mkdir(exist_ok=True)
with open(images_before_processing_dir / f"image-dict-{today}.pkl", "wb") as f:
    pickle.dump(image_dict, f)

In [None]:
image_index = 14
image_data = image_dict[image_index]
first_image_image = image_data["image"]
plt.imshow(first_image_image)
first_image_mask = image_data["grain_masks"]["above"]
plt.imshow(first_image_mask[:, :, 1], alpha=0.2)
print(f"p2nm: {image_data['pixel_to_nm_scaling']}")
print(f"image index: {image_index}")
for grain_id, grain_data in image_data["ordered_traces"]["above"].items():
    for molecule_id, molecule_data in grain_data.items():
        bbox = molecule_data["bbox"]
        ordered_coords = molecule_data["ordered_coords"]
        heights = molecule_data["heights"]

        plt.plot(ordered_coords[:, 1] + bbox[1], ordered_coords[:, 0] + bbox[0], color="red")
        # plt.plot(ordered_coords[:, 1] + bbox[1], ordered_coords[:, 0] + bbox[0])
plt.title(f"image index: {image_index} | p2nm: {image_data['pixel_to_nm_scaling']:.2f}")
plt.show()

In [None]:
new_categories = {}
new_categories["magpie"] = [6, 7, 35, 60, 63, 84, 106, 110, 113, 115, 132, 51]
new_categories["hummingbird"] = [29, 30, 36, 67, 72, 73, 85, 96, 105, 109, 112, 118, 120, 124, 17, 14]
# save to text file in base dir
with open(base_dir / "categories.txt", "w") as f:
    for category, indices in new_categories.items():
        f.write(f"{category}: {indices}\n")

In [None]:
categories = {}

categories["bad"] = [
    0,
    2,
    3,
    4,
    13,
    18,
    19,
    20,
    22,
    24,
    25,
    27,
    34,
    37,
    38,
    39,
    42,
    45,
    47,
    48,
    49,
    50,
    52,
    55,
    56,
    58,
    59,
    62,
    64,
    66,
    68,
    71,
    75,
    76,
    78,
    79,
    82,
    89,
    90,
    93,
    94,
    95,
    99,
    104,
    108,
    111,
    116,
    122,
    126,
    127,
    130,
]

categories["non-beaks"] = [
    1,
    5,
    8,
    10,
    11,
    12,
    14,
    15,
    16,
    21,
    23,
    26,
    28,
    29,
    31,
    33,
    40,
    41,
    53,
    54,
    57,
    69,
    77,
    81,
    83,
    86,
    87,
    88,
    91,
    97,
    98,
    101,
    102,
    103,
    106,
    107,
    109,
    110,
    112,
    113,
    114,
    115,
    117,
    119,
    123,
    124,
    125,
    129,
    131,
    133,
]

categories["good-beak-but-bad-trace"] = [
    14,
    17,
    29,
    30,
    36,
    51,
    61,
    65,
    67,
    72,
    73,
    74,
    80,
    84,
    85,
    91,
    96,
    105,
    106,
    109,
    110,
    112,
    113,
    115,
    118,
    120,
    121,
    124,
    132,
]

categories["good-but-unsure-if-beak"] = [
    9,
    12,
    15,
    32,
    43,
    44,
    46,
    70,
    100,
    128,
]

categories["good-beaks"] = [
    6,
    7,
    35,
    60,
    63,
]

In [None]:
for category, indices in categories.items():
    # print(f"plotting category: {category} num images: {len(indices)}")
    n_cols = 3
    n_rows = len(indices) // n_cols + 1
    fig, axs = plt.subplots(n_rows, n_cols, figsize=(10 * n_cols, 10 * n_rows))
    for plot_index, image_index in enumerate(indices):
        # print(f"plotting plot index: {plot_index} image index: {image_index}")
        ax = axs.flatten()[plot_index]
        image_data = image_dict[image_index]
        first_image_image = image_data["image"]
        ax.imshow(first_image_image)
        first_image_mask = image_data["grain_masks"]["above"]
        ax.imshow(first_image_mask[:, :, 1], alpha=0.2)
        if "ordered_traces" in image_data.keys():
            for grain_id, grain_data in image_data["ordered_traces"]["above"].items():
                for molecule_id, molecule_data in grain_data.items():
                    bbox = molecule_data["bbox"]
                    ordered_coords = molecule_data["ordered_coords"]
                    heights = molecule_data["heights"]

                    ax.plot(ordered_coords[:, 1] + bbox[1], ordered_coords[:, 0] + bbox[0], color="red")
                    # plt.plot(ordered_coords[:, 1] + bbox[1], ordered_coords[:, 0] + bbox[0])
        ax.set_title(f"image index: {image_index} | p2nm: {image_data['pixel_to_nm_scaling']:.2f}")
    plt.suptitle(f"{category} n={len(indices)}", fontsize=32)
    plt.show()