In [30]:
import os
import numpy as np
import random
import tifffile as tiff

# Known artifact regions to avoid (inclusive)
artifact_exclusion = {
    "1.1_OCTA_Vol1_Processed_Cropped_gt":
        set(range(965, 982)) | set(range(0, 2)) | set(range(998, 1000)),
    "1.2_OCTA_Vol2_Processed_Cropped_gt":
        set(range(343, 361)) | set(range(472, 488)) | set(range(916, 929)) | set(range(0, 2)) | set(range(998, 1000)),
    "1.4_OCTA_Vol1_Processed_Cropped_gt": 
        set(range(518, 536)) | set(range(548, 564)) | set(range(621, 639)) |
        set(range(656, 671)) | set(range(714, 731)) | set(range(980, 998)) | set(range(0, 2)) | set(range(998, 1000)),
    "3.4_OCTA_Vol2_Processed_Cropped_gt": 
        set(range(242, 262)) | set(range(266, 286)) | set(range(607, 622)) |
        set(range(634, 650)) | set(range(852, 871)) | set(range(881, 898)) | set(range(0, 2)) | set(range(998, 1000)),
    "5.3_OCTA_Vol1_Processed_Cropped_gt": 
        set(range(102, 116)) | set(range(440, 445)) | set(range(464, 488)) |
        set(range(498, 513)) | set(range(893, 908)) | set(range(0, 2)) | set(range(998, 1000)),
    "2.1_OCTA_Vol2_Processed_Cropped_gt":
        set(range(565, 585)) | set(range(603, 621)) | set(range(0, 2)) | set(range(998, 1000)),
    "2.2_OCTA_Vol2_Processed_Cropped_gt":
        set(range(99, 119)) | set(range(158, 176)) | set(range(492, 510)) | set(range(0, 2)) | set(range(998, 1000)),
    # "15.4_OCTA_Vol2_Processed_Cropped_gt": 
    #     set(range(0, 18)) | set(range(218, 238)) | set(range(419, 438)) |
    #     set(range(449, 468)) | set(range(725, 743)) | set(range(998, 1000)),
    # "16.2_OCTA_Vol1_Processed_Cropped_gt": 
    #     set(range(358, 376)) | set(range(0, 2)) | set(range(998, 1000)),
    # "16.3_OCTA_Vol2_Processed_Cropped_gt":
    #     set(range(0, 20)) | set(range(126, 145)) | set(range(221, 238)) | 
    #     set(range(561, 579)) | set(range(618, 636)) | set(range(998, 1000)),
    # "22.1_OCTA_Vol2_Processed_Cropped_gt":
    #     set(range(266, 287)) | set(range(368, 388)) | set(range(631, 651)) | set(range(727, 747)) | set(range(761, 781)) | 
    #     set(range(894, 912)) | set(range(973, 993)) | set(range(0, 2)) | set(range(998, 1000)),
    # "25.3_OCTA_Vol1_Processed_Cropped_gt":
    #     set(range(85, 105)) | set(range(372, 388)) | set(range(975, 994)) | set(range(0, 2)) | set(range(998, 1000)),
    # "35.2_OCTA_Vol2_Processed_Cropped_gt": 
    #     set(range(0, 2)) | set(range(998, 1000)),
}


def simulate_missing_bscans(volume: np.ndarray, volume_name: str, missing_fraction: float = 0.1, block_size_range=(1, 4)):
    """
    Simulate missing B-scans in an OCTA volume, avoiding predefined artifact regions.

    Args:
        volume (np.ndarray): Input 3D volume of shape (D, H, W)
        volume_name (str): Identifier used to retrieve exclusion ranges
        missing_fraction (float): Fraction of slices to remove
        block_size_range (tuple): Range of block sizes to simulate
    
    Returns:
        corrupted_volume, mask, missing_indices
    """
    D, H, W = volume.shape
    num_missing = int(D * missing_fraction)
    corrupted_volume = volume.copy()
    mask = np.zeros(D, dtype=np.uint8)
    missing_indices = set()

    available = np.ones(D, dtype=bool)
    artifact_indices = artifact_exclusion.get(volume_name, set())

    print(f"Exclusion indices for {volume_name}: {sorted(artifact_indices)}")

    while np.sum(mask) < num_missing:
        block_size = random.randint(*block_size_range)

        valid_starts = []
        for start_idx in range(D - block_size + 1):
            block = list(range(start_idx, start_idx + block_size))

            # Check if block is available and does not touch excluded or existing missing slices
            before = start_idx - 1
            after = start_idx + block_size

            gap = 2  # minimum number of intact slices between missing blocks
            adjacent_conflict = any(
                (i >= 0 and i < D and mask[i] == 1)
                for i in range(start_idx - gap, start_idx + block_size + gap)
                if i < start_idx or i >= start_idx + block_size  # exclude current block
            )

            if (
                available[start_idx: start_idx + block_size].all() and
                not any(idx in artifact_indices for idx in block) and
                not adjacent_conflict
            ):
                valid_starts.append(start_idx)

        if not valid_starts:
            break  # no space left

        start_idx = random.choice(valid_starts)
        block_indices = list(range(start_idx, start_idx + block_size))

        for idx in block_indices:
            corrupted_volume[idx] = 0
            mask[idx] = 1
            available[idx] = False
        missing_indices.update(block_indices)

    return corrupted_volume, mask, sorted(missing_indices)


# input_path = "/media/admin/Expansion/Mosaic_Data_for_Ipeks_Group/OCT_Inpainting_Testing_v2_MedianFilter2px/1.1_OCTA_Vol1_Processed_Cropped_gt.tif"
# input_path = "/media/admin/Expansion/Mosaic_Data_for_Ipeks_Group/OCT_Inpainting_Testing_v2_MedianFilter2px/1.2_OCTA_Vol2_Processed_Cropped_gt.tif"
# input_path = "/media/admin/Expansion/Mosaic_Data_for_Ipeks_Group/OCT_Inpainting_Testing_v2_MedianFilter2px/1.4_OCTA_Vol1_Processed_Cropped_gt.tif"
# input_path = "/media/admin/Expansion/Mosaic_Data_for_Ipeks_Group/OCT_Inpainting_Testing_v2_MedianFilter2px/3.4_OCTA_Vol2_Processed_Cropped_gt.tif"
# input_path = "/media/admin/Expansion/Mosaic_Data_for_Ipeks_Group/OCT_Inpainting_Testing_v2_MedianFilter2px/5.3_OCTA_Vol1_Processed_Cropped_gt.tif"
# input_path = "/media/admin/Expansion/Mosaic_Data_for_Ipeks_Group/OCT_Inpainting_Testing_v2_MedianFilter2px/2.1_OCTA_Vol2_Processed_Cropped_gt.tif"
input_path = "/media/admin/Expansion/Mosaic_Data_for_Ipeks_Group/OCT_Inpainting_Testing_v2_MedianFilter2px/2.2_OCTA_Vol2_Processed_Cropped_gt.tif"


# Load volume
volume = tiff.imread(input_path)
print("Volume shape:", volume.shape)

# Derive volume name (no .tif)
volume_name = os.path.splitext(os.path.basename(input_path))[0]

# Simulate missing B-scans
corrupted_volume, mask, missing = simulate_missing_bscans(volume, volume_name, missing_fraction=0.16, block_size_range=(1, 5))

# Save corrupted and mask volumes
base_dir = os.path.dirname(input_path)
base_name = os.path.splitext(os.path.basename(input_path))[0]

# Replace "_gt" with the appropriate suffix
corrupted_path = os.path.join(base_dir, f"{base_name.replace('_gt', '_corrupted')}.tif")
mask_path = os.path.join(base_dir, f"{base_name.replace('_gt', '_mask')}.tif")

tiff.imwrite(corrupted_path, corrupted_volume.astype(np.uint16), imagej=True)
mask_volume = np.tile(mask[:, None, None], (1, volume.shape[1], volume.shape[2]))
tiff.imwrite(mask_path, mask_volume.astype(np.uint8), imagej=True)

print(f"Corrupted volume saved to: {corrupted_path}")
print(f"Mask saved to: {mask_path}")
print(f"Removed {len(missing)} B-scans at indices: {missing[:10]}...")


Volume shape: (1000, 145, 400)
Exclusion indices for 2.2_OCTA_Vol2_Processed_Cropped_gt: [0, 1, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 998, 999]
Corrupted volume saved to: /media/admin/Expansion/Mosaic_Data_for_Ipeks_Group/OCT_Inpainting_Testing_v2_MedianFilter2px/2.2_OCTA_Vol2_Processed_Cropped_corrupted.tif
Mask saved to: /media/admin/Expansion/Mosaic_Data_for_Ipeks_Group/OCT_Inpainting_Testing_v2_MedianFilter2px/2.2_OCTA_Vol2_Processed_Cropped_mask.tif
Removed 160 B-scans at indices: [25, 26, 27, 28, 37, 38, 45, 46, 47, 48]...


In [None]:
import os
import numpy as np
import tifffile as tiff

# Target height
TARGET_HEIGHT = 165

# List of input file paths
input_files = [
    # "/media/admin/Expansion/Mosaic_Data_for_Ipeks_Group/OCT_Inpainting_Testing/1.1_OCTA_Vol1_Processed_Cropped_gt.tif",
    # "/media/admin/Expansion/Mosaic_Data_for_Ipeks_Group/OCT_Inpainting_Testing/1.2_OCTA_Vol2_Processed_Cropped_gt.tif",
    # "/media/admin/Expansion/Mosaic_Data_for_Ipeks_Group/OCT_Inpainting_Testing/1.4_OCTA_Vol1_Processed_Cropped_gt.tif",
    "/media/admin/Expansion/Mosaic_Data_for_Ipeks_Group/OCT_Inpainting_Testing/3.4_OCTA_Vol2_Processed_Cropped_gt.tif"
]

# Pad function
def pad_volume_to_target_height(volume, target_height):
    current_height = volume.shape[1]
    diff = target_height - current_height
    if diff <= 0:
        return volume  # Already correct size or larger (shouldn’t happen)
    
    pad_top = diff // 2
    pad_bottom = diff - pad_top
    padded_volume = np.pad(volume, ((0, 0), (pad_top, pad_bottom), (0, 0)), mode='constant', constant_values=0)
    return padded_volume

# Process each file
for file_path in input_files:
    print(f"Processing: {file_path}")
    
    # Load the volume
    volume = tiff.imread(file_path)
    
    # Pad the volume
    padded_volume = pad_volume_to_target_height(volume, TARGET_HEIGHT)
    
    # Sanity check
    assert padded_volume.shape[1] == TARGET_HEIGHT, "Padding failed to reach target height"
    
    # Save output
    base, ext = os.path.splitext(file_path)
    out_path = base + f"_Height165{ext}"
    tiff.imwrite(out_path, padded_volume.astype(np.uint16))
    
    print(f"Saved padded volume to: {out_path}")


Processing: /media/admin/Expansion/Mosaic_Data_for_Ipeks_Group/OCT_Inpainting_Testing/3.4_OCT_uint16_Cropped_Reflected_VolumeSplit_2_RegSeq_seqSVD_Cropped_gt.tif
Saved padded volume to: /media/admin/Expansion/Mosaic_Data_for_Ipeks_Group/OCT_Inpainting_Testing/3.4_OCT_uint16_Cropped_Reflected_VolumeSplit_2_RegSeq_seqSVD_Cropped_gt_Height165.tif
