In [34]:
import os
import shutil
import cv2
import json
import random
from data_utils import SegDataset, convert_sequential_labels, save_array_as_image

In [22]:
ds = SegDataset(
    image_dir="./data/train_frames/",
    mask_dir="./data/train_seg/",
    aug_image_dir="./data/generated-frames-edge-1-350000-fid/",
    aug_mask_dir="./data/train_seg_aug/",
    aug_prop=0.5,
    transform=None,
)

ds_va = SegDataset(
    image_dir="./data/valid_frames/",
    mask_dir="./data/valid_seg/",
    transform=None,
)

In [23]:
ds.frames[-5:], ds.masks[-5:]

(['./data/generated-frames-edge-1-350000-fid/ch4_ed_mask_164_generated_0.png',
  './data/generated-frames-edge-1-350000-fid/ch2_es_mask_219_generated_0.png',
  './data/generated-frames-edge-1-350000-fid/ch4_ed_mask_155_generated_1.png',
  './data/generated-frames-edge-1-350000-fid/ch2_ed_mask_158_generated_1.png',
  './data/generated-frames-edge-1-350000-fid/ch4_es_mask_402_generated_0.png'],
 ['./data/train_seg_aug/ch4_ed_mask_164_0.png',
  './data/train_seg_aug/ch2_es_mask_219_0.png',
  './data/train_seg_aug/ch4_ed_mask_155_1.png',
  './data/train_seg_aug/ch2_ed_mask_158_1.png',
  './data/train_seg_aug/ch4_es_mask_402_0.png'])

In [24]:
def get_file_name(path_name):
    name = path_name.split("/")[-1]
    return name

def convert_label_save(old_path, new_path):
    img = convert_sequential_labels(old_path)
    save_array_as_image(img, new_path)
    return

In [42]:
seed = 0
random.seed(seed)

directory = "./data_nnunet/edge_1_350k_fid/"
if not os.path.exists(directory):
    os.mkdir(directory)

dirs = ["imagesTr", "labelsTr", "imagesTs"]
for name in dirs:
    os.makedirs(os.path.join(directory, name), exist_ok=True)
tr_path = os.path.join(directory, dirs[0])
lb_path = os.path.join(directory, dirs[1])
ts_path = os.path.join(directory, dirs[2])

datalist_json = {"training": [], "testing": []}
map_pairs = {}

# Test data
datalist_json["testing"] = [
    {"image": os.path.join(ts_path, get_file_name(path))} for path in ds_va.frames
]

# Training data
num_folds = 4
num_samples = len(ds.frames)
folds = [(i % num_folds) for i in range(num_samples)]
random.shuffle(folds)
for ii, (frame, mask) in enumerate(zip(ds.frames, ds.masks)):
    new_name = f"camus_{ii}.png"
    map_pairs[new_name] = {"image": frame, "label": mask}
    image_dir = os.path.join(tr_path, new_name)
    label_dir = os.path.join(lb_path, new_name)
    shutil.copy(frame, image_dir)
    convert_label_save(mask, label_dir)
    datalist_json["training"].append({
        "image": image_dir,
        "label": label_dir,
        "fold": folds[ii]  # initialize as a single fold
    })

data_list_filename = os.path.join(directory, "data_list.json")
with open(data_list_filename, "w") as J:
    json.dump(datalist_json, J, indent=4)

map_list_filename = os.path.join(directory, "map_list.json")
with open(map_list_filename, "w") as J:
    json.dump(map_pairs, J, indent=4)

In [32]:
map_list_filename = os.path.join(directory, "map_list.json")
with open(map_list_filename, "w") as J:
    json.dump(map_pairs, J, indent=4)

In [40]:
num_folds = 4
num_samples = len(ds.frames)
folds = [(i % num_folds)+1 for i in range(num_samples)]
random.shuffle(folds)
folds[:20]

[4, 3, 2, 3, 3, 1, 3, 3, 1, 2, 3, 2, 4, 1, 3, 3, 1, 3, 2, 4]