## Postprocessing of masks (Validation dataset) to get rid of a few mask prediction errors

In [None]:
import os
import numpy as np
import nibabel as nib
from pathlib import Path
from skimage.measure import label
from scipy.ndimage import distance_transform_edt


def process_slice(slice_mask, midline):
    processed_mask = slice_mask.copy()

    for label_val in range(1, 6):  # Labels 1 to 5
        # ✅ Corrected grouping:
        if label_val in [1, 2, 3]:  # Right lung (should appear on left side of image)
            correct_side = 'left'
            potential_new_labels = [4, 5]
        else:  # Left lung (should appear on right side of image)
            correct_side = 'right'
            potential_new_labels = [1, 2, 3]

        binary = (slice_mask == label_val)
        if not np.any(binary):
            continue

        components = label(binary)
        for comp_idx in range(1, components.max() + 1):
            comp_mask = (components == comp_idx)
            cols = np.where(np.any(comp_mask, axis=0))[0]
            if len(cols) == 0:
                continue

            min_x, max_x = cols[0], cols[-1]

            is_wrong_side = False
            if correct_side == 'left' and min_x > midline:
                is_wrong_side = True
            elif correct_side == 'right' and max_x < midline:
                is_wrong_side = True

            if not is_wrong_side or (min_x < midline and max_x > midline):
                continue

            min_dist = float('inf')
            best_label = None
            for new_label in potential_new_labels:
                target_mask = (slice_mask == new_label)
                if not np.any(target_mask):
                    continue
                dist_map = distance_transform_edt(~target_mask)
                dist_to_label = dist_map[comp_mask].min()

                if dist_to_label < min_dist and dist_to_label <= 20:
                    min_dist = dist_to_label
                    best_label = new_label

            if best_label is not None:
                processed_mask[comp_mask] = best_label

    return processed_mask

def process_volume(input_path, output_dir):
    print(f"\nProcessing: {os.path.basename(input_path)}")
    img = nib.load(input_path)
    seg = img.get_fdata().astype(np.uint8)
    processed_volume = np.zeros_like(seg)
    midline = seg.shape[1] // 2
    total_changes = 0

    for z in range(seg.shape[2]):
        orig_slice = seg[:, :, z]
        slice_mask = np.rot90(orig_slice, k=-1)
        slice_mask = np.fliplr(slice_mask)
        processed_slice = process_slice(slice_mask, midline)
        processed_slice = np.fliplr(processed_slice)
        processed_slice = np.rot90(processed_slice, k=1)
        processed_volume[:, :, z] = processed_slice
        total_changes += np.sum(processed_slice != orig_slice)

    output_filename = os.path.basename(str(input_path)).replace('.nii.gz', '_postprocessed.nii.gz')
    output_path = os.path.join(output_dir, output_filename)
    processed_img = nib.Nifti1Image(processed_volume, img.affine, img.header)
    nib.save(processed_img, output_path)

    print(f"  Total changes: {total_changes}")
    print(f"  Avg changes/slice: {total_changes / seg.shape[2]:.2f}")
    print(f"  Saved to: {output_path}")

    return total_changes

# === Input and output folders
input_folder = r"------ INSERT PATH HERE ------"
output_folder = r"------ INSERT PATH HERE ------"
os.makedirs(output_folder, exist_ok=True)

# === Load files
nii_files = list(Path(input_folder).glob("*.nii.gz"))
nii_files = [f for f in nii_files if not str(f).endswith("_postprocessed.nii.gz")]

print(f"Found {len(nii_files)} files to process")

# === Process all
total_files_changes = {}
for file_path in nii_files:
    try:
        changes = process_volume(file_path, output_folder)
        total_files_changes[os.path.basename(file_path)] = changes
    except Exception as e:
        print(f"❌ Error processing {file_path}: {e}")

# === Summary
print("\n✅ Processing Summary:")
print(f"Successfully processed {len(total_files_changes)} files\n")
print("🔝 Top 5 files with most changes:")
for filename, changes in sorted(total_files_changes.items(), key=lambda x: x[1], reverse=True)[:5]:
    print(f"{filename}: {changes} changes")


## Calculating the Dice of the postprocessed masks and their improvement over the base predictions

In [None]:
import nibabel as nib
import numpy as np
import pandas as pd
import os
from pathlib import Path

def calculate_dice(pred, truth, label):
    pred_mask = (pred == label)
    truth_mask = (truth == label)
    intersection = np.sum(pred_mask & truth_mask)
    total = np.sum(pred_mask) + np.sum(truth_mask)
    if total == 0:
        return 1.0  # perfect if both are empty
    return 2 * intersection / total

def compute_dice_all_labels(original_path, postprocessed_path):
    pred_img = nib.load(original_path)
    post_img = nib.load(postprocessed_path)
    
    pred = pred_img.get_fdata().astype(np.uint8)
    post = post_img.get_fdata().astype(np.uint8)
    
    if pred.shape != post.shape:
        raise ValueError(f"Shape mismatch: {pred.shape} vs {post.shape}")
    
    dice_scores = []
    for label in range(1, 6):  # labels 1 to 5
        d = calculate_dice(pred, post, label)
        dice_scores.append(d)
    dice_scores.append(np.mean(dice_scores))  # mean
    return dice_scores

# === Paths
base_dir = r"------ INSERT PATH HERE ------"
val_pred_dir = os.path.join(base_dir, "validation")
val_post_dir = os.path.join(base_dir, "validation_postprocessed")

# === Process All Files
results = []
columns = ['Label_1', 'Label_2', 'Label_3', 'Label_4', 'Label_5', 'Mean_Dice']

print("\n🔍 Comparing validation vs postprocessed predictions:")
for file in os.listdir(val_pred_dir):
    if not file.endswith('.nii.gz') or file.endswith('_postprocessed.nii.gz'):
        continue
    
    base_name = file.replace('.nii.gz', '')
    original_path = os.path.join(val_pred_dir, file)
    postprocessed_path = os.path.join(val_post_dir, base_name + '_postprocessed.nii.gz')
    
    if not os.path.exists(postprocessed_path):
        print(f"⚠️ Missing postprocessed file for: {base_name}")
        continue

    try:
        dice_vals = compute_dice_all_labels(original_path, postprocessed_path)
        row = {'ID': base_name}
        row.update({col: val for col, val in zip(columns, dice_vals)})
        results.append(row)
        print(f"✔ {base_name}: Mean Dice = {dice_vals[-1]:.4f}")
    except Exception as e:
        print(f"❌ Error comparing {base_name}: {e}")

# === Create DataFrame (in-memory only)
df = pd.DataFrame(results)

# === Print 10 worst Dice cases
print("\n🔻 Worst 10 Mean Dice Cases:")
df['ID'] = df['ID'].astype(str)
worst_diff = df.sort_values(by='Mean_Dice', ascending=True).head(10)
print(worst_diff[['ID', 'Mean_Dice']])


## Converting isolated 3D masks (Validation dataset) into label with largest border or background label

In [None]:
import os
import nibabel as nib
import numpy as np
from scipy.ndimage import label, binary_dilation
from pathlib import Path

# === Directories ===
input_dir = r"------ INSERT PATH HERE ------"
output_dir = r"------ INSERT PATH HERE ------"
os.makedirs(output_dir, exist_ok=True)

# === Function to assign an isolated component to the neighboring label with largest contact ===
def reassign_to_largest_border_component(island_mask, full_mask):
    dilated = binary_dilation(island_mask, iterations=1)
    border_voxels = dilated & (full_mask > 0) & (~island_mask)
    neighbor_labels, counts = np.unique(full_mask[border_voxels], return_counts=True)
    if len(counts) == 0:
        return 0  # assign to background
    return neighbor_labels[np.argmax(counts)]

# === Main postprocessing function ===
def remove_isolated_components(filepath, output_dir):
    img = nib.load(filepath)
    data = img.get_fdata().astype(np.uint8)
    new_data = np.zeros_like(data)

    label_changes = {}
    to_background_volumes = {}
    total_voxels = np.prod(data.shape)

    for label_id in range(1, 6):  # Labels 1 to 5
        binary = (data == label_id)
        if not np.any(binary):
            continue
        labeled_cc, num = label(binary)
        sizes = np.bincount(labeled_cc.ravel())
        sizes[0] = 0
        if len(sizes) == 1:
            new_data[labeled_cc == 1] = label_id
            continue
        largest_cc = np.argmax(sizes)
        new_data[labeled_cc == largest_cc] = label_id
        for i in range(1, num + 1):
            if i == largest_cc:
                continue
            island_mask = (labeled_cc == i)
            new_label = reassign_to_largest_border_component(island_mask, data)
            vol = np.sum(island_mask)
            if new_label == 0:
                to_background_volumes[filepath.name] = to_background_volumes.get(filepath.name, 0) + vol
            else:
                key = (filepath.name, label_id, new_label)
                label_changes[key] = label_changes.get(key, 0) + vol
            new_data[island_mask] = new_label

    out_path = os.path.join(output_dir, os.path.basename(filepath))
    nib.save(nib.Nifti1Image(new_data, img.affine, img.header), out_path)
    return label_changes, to_background_volumes

# === Process all files ===
all_files = list(Path(input_dir).glob("*.nii.gz"))
total_label_changes = {}
total_bg_changes = {}

for f in all_files:
    label_chg, bg_chg = remove_isolated_components(f, output_dir)
    for k, v in label_chg.items():
        total_label_changes[k] = total_label_changes.get(k, 0) + v
    for k, v in bg_chg.items():
        total_bg_changes[k] = total_bg_changes.get(k, 0) + v

# === Summary ===
print("\n🔝 Top 5 conversions from one label to another:")
for (name, from_label, to_label), vol in sorted(total_label_changes.items(), key=lambda x: x[1], reverse=True)[:5]:
    print(f"{name}: {vol} voxels from label {from_label} → {to_label}")

print("\n🔝 Top 5 conversions from label to background:")
for name, vol in sorted(total_bg_changes.items(), key=lambda x: x[1], reverse=True)[:5]:
    print(f"{name}: {vol} voxels converted to background")


## Now for the whole preop Stanford dataset of 343

In [None]:
import os
import nibabel as nib
import numpy as np
from skimage.measure import label
from scipy.ndimage import distance_transform_edt
from pathlib import Path

def process_slice(slice_mask, midline):
    processed_mask = slice_mask.copy()

    for label_val in range(1, 6):  # Labels 1 to 5
        if label_val in [1, 2, 3]:  # Right lung
            correct_side = 'left'
            potential_new_labels = [4, 5]
        else:  # Left lung
            correct_side = 'right'
            potential_new_labels = [1, 2, 3]

        binary = (slice_mask == label_val)
        if not np.any(binary):
            continue

        components = label(binary)
        for comp_idx in range(1, components.max() + 1):
            comp_mask = (components == comp_idx)
            cols = np.where(np.any(comp_mask, axis=0))[0]
            if len(cols) == 0:
                continue

            min_x, max_x = cols[0], cols[-1]
            is_wrong_side = (
                (correct_side == 'left' and min_x > midline) or
                (correct_side == 'right' and max_x < midline)
            )

            if not is_wrong_side or (min_x < midline and max_x > midline):
                continue

            min_dist = float('inf')
            best_label = None
            for new_label in potential_new_labels:
                target_mask = (slice_mask == new_label)
                if not np.any(target_mask):
                    continue
                dist_map = distance_transform_edt(~target_mask)
                dist_to_label = dist_map[comp_mask].min()

                if dist_to_label < min_dist and dist_to_label <= 20:
                    min_dist = dist_to_label
                    best_label = new_label

            if best_label is not None:
                processed_mask[comp_mask] = best_label

    return processed_mask

def process_volume(input_path, output_dir):
    print(f"\nProcessing: {os.path.basename(input_path)}")
    img = nib.load(input_path)
    seg = img.get_fdata().astype(np.uint8)
    processed_volume = np.zeros_like(seg)
    midline = seg.shape[1] // 2
    total_changes = 0

    for z in range(seg.shape[2]):
        orig_slice = seg[:, :, z]
        slice_mask = np.rot90(orig_slice, k=-1)
        slice_mask = np.fliplr(slice_mask)
        processed_slice = process_slice(slice_mask, midline)
        processed_slice = np.fliplr(processed_slice)
        processed_slice = np.rot90(processed_slice, k=1)
        processed_volume[:, :, z] = processed_slice
        total_changes += np.sum(processed_slice != orig_slice)

    output_filename = os.path.basename(str(input_path)).replace('.nii.gz', '_postprocessed.nii.gz')
    output_path = os.path.join(output_dir, output_filename)
    processed_img = nib.Nifti1Image(processed_volume, img.affine, img.header)
    nib.save(processed_img, output_path)

    print(f"  Total changes: {total_changes}")
    print(f"  Avg changes/slice: {total_changes / seg.shape[2]:.2f}")
    print(f"  Saved to: {output_path}")

    return total_changes

# === Input and output folders
input_folder = r"------ INSERT PATH HERE ------"
output_folder = r"------ INSERT PATH HERE -----"
os.makedirs(output_folder, exist_ok=True)

# === Load files
nii_files = list(Path(input_folder).glob("*.nii.gz"))

print(f"Found {len(nii_files)} files to process")

# === Process all
total_files_changes = {}
for file_path in nii_files:
    try:
        changes = process_volume(file_path, output_folder)
        total_files_changes[os.path.basename(file_path)] = changes
    except Exception as e:
        print(f"❌ Error processing {file_path}: {e}")

# === Summary
print("\n✅ Processing Summary:")
print(f"Successfully processed {len(total_files_changes)} files\n")
print("🔝 Top 5 files with most changes:")
for filename, changes in sorted(total_files_changes.items(), key=lambda x: x[1], reverse=True)[:5]:
    print(f"{filename}: {changes} changes")


## Postprocessing similar to the Validation dataset

In [None]:
import os
import nibabel as nib
import numpy as np
from scipy.ndimage import label, binary_dilation
from pathlib import Path

# === Directories ===
input_dir = r"------ INSERT PATH HERE ------"
output_dir = r"------ INSERT PATH HERE -----"
os.makedirs(output_dir, exist_ok=True)

# === Function to assign isolated component to neighboring label with largest border contact ===
def reassign_to_largest_border_component(island_mask, full_mask):
    dilated = binary_dilation(island_mask, iterations=1)
    border_voxels = dilated & (full_mask > 0) & (~island_mask)
    neighbor_labels, counts = np.unique(full_mask[border_voxels], return_counts=True)
    if len(counts) == 0:
        return 0  # assign to background
    return neighbor_labels[np.argmax(counts)]

# === Main postprocessing function ===
def remove_isolated_components(filepath, output_dir):
    img = nib.load(filepath)
    data = img.get_fdata().astype(np.uint8)
    new_data = np.zeros_like(data)

    label_changes = {}
    to_background_volumes = {}

    for label_id in range(1, 6):  # Labels 1 to 5 for Preop
        binary = (data == label_id)
        if not np.any(binary):
            continue
        labeled_cc, num = label(binary)
        sizes = np.bincount(labeled_cc.ravel())
        sizes[0] = 0
        if len(sizes) == 1:
            new_data[labeled_cc == 1] = label_id
            continue
        largest_cc = np.argmax(sizes)
        new_data[labeled_cc == largest_cc] = label_id
        for i in range(1, num + 1):
            if i == largest_cc:
                continue
            island_mask = (labeled_cc == i)
            new_label = reassign_to_largest_border_component(island_mask, data)
            vol = np.sum(island_mask)
            if new_label == 0:
                to_background_volumes[filepath.name] = to_background_volumes.get(filepath.name, 0) + vol
            else:
                key = (filepath.name, label_id, new_label)
                label_changes[key] = label_changes.get(key, 0) + vol
            new_data[island_mask] = new_label

    out_path = os.path.join(output_dir, os.path.basename(filepath))
    nib.save(nib.Nifti1Image(new_data, img.affine, img.header), out_path)
    return label_changes, to_background_volumes

# === Process all files ===
all_files = list(Path(input_dir).glob("*.nii.gz"))
total_label_changes = {}
total_bg_changes = {}

for f in all_files:
    label_chg, bg_chg = remove_isolated_components(f, output_dir)
    for k, v in label_chg.items():
        total_label_changes[k] = total_label_changes.get(k, 0) + v
    for k, v in bg_chg.items():
        total_bg_changes[k] = total_bg_changes.get(k, 0) + v

# === Summary ===
print("\n🔝 Top 5 conversions from one label to another:")
for (name, from_label, to_label), vol in sorted(total_label_changes.items(), key=lambda x: x[1], reverse=True)[:5]:
    print(f"{name}: {vol} voxels from label {from_label} → {to_label}")

print("\n🔝 Top 5 conversions from label to background:")
for name, vol in sorted(total_bg_changes.items(), key=lambda x: x[1], reverse=True)[:5]:
    print(f"{name}: {vol} voxels converted to background")
