In [1]:
import os
import sys
from glob import glob
from natsort import natsorted

import pandas as pd
import re

ROOT_DIR = '/mnt/ceph/users/alu10/datasets/256x256x256_Registered_Iso_Blastospim1-Trilinear_Image'
TEST_SET_LIST = ['F24_001', 'F24_002', 'F24_006', 'F25_002', 'F25_008', 'F27_010', 'F27_007', 'F27_009', 'F29_003','F29_004', 'F30_004', 'F30_008', 'F30_009', 'M6_021', 'M6_012', 'M7_004', 'M7_000', 'F42_063', 'F41_056', 'F34_073', 'F33_067', 'F26_008', 'F24_010', 'F8_072', 'F44_087', 'F44_089', 'F39_117', 'F40_136', 'F49_148', 'F55_185']
TEST_SUPPORT_SET_LIST = ['F8_071', 'F8_072', 'F8_073', 'F24_000', 'F24_001', 'F24_002', 'F24_003', 'F24_005', 'F24_006', 'F24_007', 'F24_009', 'F24_010', 'F25_001', 'F25_002', 'F25_003', 'F25_007', 'F25_008', 'F25_009', 'F26_008', 'F26_009', 'F27_006', 'F27_007', 'F27_008', 'F27_009', 'F27_010', 'F29_002', 'F29_003', 'F29_004', 'F29_005', 'F30_003', 'F30_004', 'F30_005', 'F30_007', 'F30_008', 'F30_009', 'F30_010', 'F33_066', 'F33_067', 'F33_068', 'F34_072', 'F34_073', 'F34_074', 'F38_116', 'F39_117', 'F39_118', 'F40_135', 'F40_136', 'F40_137', 'F41_055', 'F41_056', 'F41_057', 'F42_062', 'F42_063', 'F42_064', 'F44_086', 'F44_087', 'F44_088', 'F44_089', 'F44_090', 'F49_147', 'F49_148', 'F49_149', 'F55_184', 'F55_185', 'F55_186', 'M6_011', 'M6_012', 'M6_013', 'M6_020', 'M6_021', 'M7_000', 'M7_001', 'M7_003', 'M7_004', 'M7_005']

In [2]:
image_paths = glob('/mnt/ceph/users/alu10/datasets/256x256x256_Registered_Iso_Blastospim1-Trilinear_Image/*/*/images/*.npy')
mask_paths = glob('/mnt/ceph/users/alu10/datasets/256x256x256_Registered_Iso_Blastospim1-Trilinear_Image/*/*/masks/*.npy')
train_image_paths = natsorted([p for p in image_paths if not any(seq in p for seq in TEST_SET_LIST)])
train_mask_paths = natsorted([p for p in mask_paths if not any(seq in p for seq in TEST_SET_LIST)])

val_image_paths_pure = natsorted([p for p in image_paths if any(seq in p for seq in TEST_SET_LIST)])
val_mask_paths_pure = natsorted([p for p in mask_paths if any(seq in p for seq in TEST_SET_LIST)])
val_image_paths = natsorted([p for p in image_paths if any(seq in p for seq in TEST_SUPPORT_SET_LIST)])
val_mask_paths = natsorted([p for p in mask_paths if any(seq in p for seq in TEST_SUPPORT_SET_LIST)])

In [4]:
print(train_image_paths[200])
print(train_image_paths[201])

/mnt/ceph/users/alu10/datasets/256x256x256_Registered_Iso_Blastospim1-Trilinear_Image/F8/F8_066/images/F8_066_image_0004.npy
/mnt/ceph/users/alu10/datasets/256x256x256_Registered_Iso_Blastospim1-Trilinear_Image/F8/F8_066/images/F8_066_image_0005.npy


In [10]:
train_dict = {'image_before': [], 'image_target': [], 'image_after': [], 'mask_target': []}
for idx in range(len(train_image_paths)):
    image_path_target = train_image_paths[idx]
    mask_path_target = train_mask_paths[idx]

    def extract_sequence_and_timepoint(path):
        # Extract sequence name from the directory
        sequence_name = path.split('/')[7]
        timepoint = int(path.split('/')[8].split('_')[1])
        croppoint = int(path[-8:-4])
    
        return sequence_name, timepoint

    def replace_timepoint(path, sequence_name, old_timepoint, new_timepoint):
        return path.replace(f'{sequence_name}_{old_timepoint:03d}', f'{sequence_name}_{new_timepoint:03d}')
        
    sequence_target, timepoint_target = extract_sequence_and_timepoint(image_path_target)

    timepoint_before = timepoint_target-1
    image_path_before = replace_timepoint(image_path_target, sequence_target, timepoint_target, timepoint_before)
    if image_path_before in train_image_paths:
        has_before = True
    else:
        has_before = False

    timepoint_after = timepoint_target+1
    image_path_after = replace_timepoint(image_path_target, sequence_target, timepoint_target, timepoint_after)
    if image_path_after in train_image_paths:
        has_after = True
    else:
        has_after = False

    # Add relative path from ROOT_DIR
    train_dict['image_before'].append(image_path_before.replace(ROOT_DIR + '/', '') if has_before else '')
    train_dict['image_target'].append(image_path_target.replace(ROOT_DIR + '/', ''))
    train_dict['image_after'].append(image_path_after.replace(ROOT_DIR + '/', '') if has_after else '')
    train_dict['mask_target'].append(mask_path_target.replace(ROOT_DIR + '/', ''))

# Save training data to CSV
train_df = pd.DataFrame(train_dict)
train_df.to_csv('../annotations/train_split_4D.csv', index=False)

In [11]:
train_df

Unnamed: 0,image_before,image_target,image_after,mask_target
0,,F2/F2_008/images/F2_008_image_0001.npy,F2/F2_009/images/F2_009_image_0001.npy,F2/F2_008/masks/F2_008_masks_0001.npy
1,,F2/F2_008/images/F2_008_image_0002.npy,F2/F2_009/images/F2_009_image_0002.npy,F2/F2_008/masks/F2_008_masks_0002.npy
2,,F2/F2_008/images/F2_008_image_0003.npy,F2/F2_009/images/F2_009_image_0003.npy,F2/F2_008/masks/F2_008_masks_0003.npy
3,,F2/F2_008/images/F2_008_image_0004.npy,F2/F2_009/images/F2_009_image_0004.npy,F2/F2_008/masks/F2_008_masks_0004.npy
4,,F2/F2_008/images/F2_008_image_0005.npy,F2/F2_009/images/F2_009_image_0005.npy,F2/F2_008/masks/F2_008_masks_0005.npy
...,...,...,...,...
8420,T2/T2_015/images/T2_015_image_0004.npy,T2/T2_016/images/T2_016_image_0004.npy,,T2/T2_016/masks/T2_016_masks_0004.npy
8421,T2/T2_015/images/T2_015_image_0005.npy,T2/T2_016/images/T2_016_image_0005.npy,,T2/T2_016/masks/T2_016_masks_0005.npy
8422,T2/T2_015/images/T2_015_image_0006.npy,T2/T2_016/images/T2_016_image_0006.npy,,T2/T2_016/masks/T2_016_masks_0006.npy
8423,T2/T2_015/images/T2_015_image_0007.npy,T2/T2_016/images/T2_016_image_0007.npy,,T2/T2_016/masks/T2_016_masks_0007.npy


In [15]:
val_dict = {'image_before': [], 'image_target': [], 'image_after': [], 'mask_target': []}
for idx in range(len(val_image_paths_pure)):
    image_path_target = val_image_paths_pure[idx]
    mask_path_target = val_mask_paths_pure[idx]

    def extract_sequence_and_timepoint(path):
        # Extract sequence name from the directory
        sequence_name = path.split('/')[7]
        timepoint = int(path.split('/')[8].split('_')[1])
        croppoint = int(path[-8:-4])
    
        return sequence_name, timepoint

    def replace_timepoint(path, sequence_name, old_timepoint, new_timepoint):
        return path.replace(f'{sequence_name}_{old_timepoint:03d}', f'{sequence_name}_{new_timepoint:03d}')
        
    sequence_target, timepoint_target = extract_sequence_and_timepoint(image_path_target)

    timepoint_before = timepoint_target-1
    image_path_before = replace_timepoint(image_path_target, sequence_target, timepoint_target, timepoint_before)
    if image_path_before in val_image_paths:
        has_before = True
    else:
        has_before = False

    timepoint_after = timepoint_target+1
    image_path_after = replace_timepoint(image_path_target, sequence_target, timepoint_target, timepoint_after)
    if image_path_after in val_image_paths:
        has_after = True
    else:
        has_after = False

    # Add relative path from ROOT_DIR
    val_dict['image_before'].append(image_path_before.replace(ROOT_DIR + '/', '') if has_before else '')
    val_dict['image_target'].append(image_path_target.replace(ROOT_DIR + '/', ''))
    val_dict['image_after'].append(image_path_after.replace(ROOT_DIR + '/', '') if has_after else '')
    val_dict['mask_target'].append(mask_path_target.replace(ROOT_DIR + '/', ''))

# Save validation data to CSV
val_df = pd.DataFrame(val_dict)
val_df.to_csv('../annotations/val_split_4D.csv', index=False)

In [16]:
val_df

Unnamed: 0,image_before,image_target,image_after,mask_target
0,F8/F8_071/images/F8_071_image_0001.npy,F8/F8_072/images/F8_072_image_0001.npy,F8/F8_073/images/F8_073_image_0001.npy,F8/F8_072/masks/F8_072_masks_0001.npy
1,F8/F8_071/images/F8_071_image_0002.npy,F8/F8_072/images/F8_072_image_0002.npy,F8/F8_073/images/F8_073_image_0002.npy,F8/F8_072/masks/F8_072_masks_0002.npy
2,F8/F8_071/images/F8_071_image_0003.npy,F8/F8_072/images/F8_072_image_0003.npy,F8/F8_073/images/F8_073_image_0003.npy,F8/F8_072/masks/F8_072_masks_0003.npy
3,F8/F8_071/images/F8_071_image_0004.npy,F8/F8_072/images/F8_072_image_0004.npy,F8/F8_073/images/F8_073_image_0004.npy,F8/F8_072/masks/F8_072_masks_0004.npy
4,F8/F8_071/images/F8_071_image_0005.npy,F8/F8_072/images/F8_072_image_0005.npy,F8/F8_073/images/F8_073_image_0005.npy,F8/F8_072/masks/F8_072_masks_0005.npy
...,...,...,...,...
501,M7-M8/M7_004/images/M7_004_image_0004.npy,M7-M8/M7_004/images/M7_004_image_0004.npy,M7-M8/M7_004/images/M7_004_image_0004.npy,M7-M8/M7_004/masks/M7_004_masks_0004.npy
502,M7-M8/M7_004/images/M7_004_image_0005.npy,M7-M8/M7_004/images/M7_004_image_0005.npy,M7-M8/M7_004/images/M7_004_image_0005.npy,M7-M8/M7_004/masks/M7_004_masks_0005.npy
503,M7-M8/M7_004/images/M7_004_image_0006.npy,M7-M8/M7_004/images/M7_004_image_0006.npy,M7-M8/M7_004/images/M7_004_image_0006.npy,M7-M8/M7_004/masks/M7_004_masks_0006.npy
504,M7-M8/M7_004/images/M7_004_image_0007.npy,M7-M8/M7_004/images/M7_004_image_0007.npy,M7-M8/M7_004/images/M7_004_image_0007.npy,M7-M8/M7_004/masks/M7_004_masks_0007.npy
