In [None]:
import os
from glob import glob
import SimpleITK as sitk
from tqdm.notebook import tqdm, trange
import shutil
import numpy as np
import pandas as pd

In [None]:
!pip install SimpleITK

# HU coverting to 500-(-1000)

In [None]:
def normalizePlanes(npzarray):
    maxHU = 500.0
    minHU = -1000.0
    npzarray = (npzarray - minHU) / (maxHU - minHU)
    npzarray[npzarray > 1] = 1.0
    npzarray[npzarray < 0] = 0.0
    return npzarray

input_folder = '/home/m-health/TBM/Lung/Luna_16_cropped_resampled_no_hu_32'
output_folder = '/home/m-health/TBM/Lung/Luna_16_hu_from_crop_32'

# List all NIfTI files in the input folder
nifti_files = [file for file in os.listdir(input_folder) if file.endswith('.nii')]

for nifti_file in nifti_files:
    input_path = os.path.join(input_folder, nifti_file)
    output_path = os.path.join(output_folder, nifti_file)

    ds = sitk.ReadImage(input_path)
    image_array = sitk.GetArrayFromImage(ds)

    normalized_slices = []
    for slice in image_array:         
        normalized_slices.append(normalizePlanes(slice))
    normalized_array = np.array(normalized_slices)

    normalized_img = sitk.GetImageFromArray(normalized_array)
    normalized_img.CopyInformation(ds)

    sitk.WriteImage(normalized_img, output_path)

print("Normalization and saving completed for all files.")


# Resampling and cropping for 48

In [None]:
import pandas as pd

excel_file_path = 'Luna_16_with_z_loc.xlsx'

# Output directory for cropped images
output_path = '/home/m-health/TBM/Lung/Luna_16_cropped_resampled_no_hu_64'

# Read the Excel file
df = pd.read_excel(excel_file_path)

# Loop through the rows in the DataFrame
for index, row in df.iterrows():
    original_nodule_location = (row['x loc'], row['y loc'], row['z loc'])
    width = 48
    w = int(width / 2)

    # Find the NIfTI file matching the scan number
    nifti_file_paths = glob('/home/m-health/TBM/Lung/Luna_16_final_nii/{}.nii'.format(row['Series Uid']))
    
    if len(nifti_file_paths) == 1:
        nifti_file_path = nifti_file_paths[0]
        image = sitk.ReadImage(nifti_file_path)
        
        # Define the desired target voxel size
        target_spacing = [0.7, 0.7, 1]

        # Create a resampling filter
        resampler = sitk.ResampleImageFilter()
        resampler.SetOutputSpacing(target_spacing)

        # Calculate the new size based on the ratio of original and target spacings
        original_spacing = image.GetSpacing()
        new_size = [int(sz * (spc / target_sp) + 0.5) for sz, spc, target_sp in zip(image.GetSize(), original_spacing, target_spacing)]
        resampler.SetSize(new_size)

        resampler.SetOutputDirection(image.GetDirection())
        resampler.SetOutputOrigin(image.GetOrigin())
        resampler.SetInterpolator(sitk.sitkLinear)
 
        # Execute the resampling
        resampled_image = resampler.Execute(image)

        # Convert the original nodule location to physical coordinates in the original image
        original_nodule_location_physical = image.TransformIndexToPhysicalPoint(original_nodule_location)

        # Transform the original nodule location to the new voxel space
        new_nodule_location_index = resampled_image.TransformPhysicalPointToIndex(original_nodule_location_physical)

        # Convert the index to integer voxel coordinates
        new_nodule_location_voxel_units = [int(round(index)) for index in new_nodule_location_index]
        
        # Crop the region around the specified coordinates
        resampled_image_cropped = resampled_image[new_nodule_location_voxel_units[0]-w:new_nodule_location_voxel_units[0]+w,
                                           new_nodule_location_voxel_units[1]-w:new_nodule_location_voxel_units[1]+w,
                                           new_nodule_location_voxel_units[2]-w:new_nodule_location_voxel_units[2]+w]


        # Create output directory if it doesn't exist
        os.makedirs(output_path, exist_ok=True)

        # Create a unique identifier based on the index
        output_file_name = '{}_{}.nii'.format(row['Series Uid'], index)

        # Save the cropped image with the unique identifier
        output_file_path = os.path.join(output_path, output_file_name)
        sitk.WriteImage(resampled_image_cropped, output_file_path)

    else:
        print("NIfTI file not found for scan number:", row['Series Uid'])

In [None]:
nifti_file_path = '/home/m-health/TBM/Lung/Luna_16_cropped_resampled_no_hu_64/1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222365663678666836860_1146.nii'

# Read the image using SimpleITK
image = sitk.ReadImage(nifti_file_path)

# Get the shape of the image
image_shape = image.GetSize()

print("Image Shape:", image_shape)

# Resampling and cropping for 64

In [None]:
import pandas as pd

excel_file_path = 'Luna_16_with_z_loc.xlsx'

# Output directory for cropped images
output_path = '/home/m-health/TBM/Lung/Luna_16_cropped_resampled_no_hu_64'

# Read the Excel file
df = pd.read_excel(excel_file_path)

# Loop through the rows in the DataFrame
for index, row in df.iterrows():
    original_nodule_location = (row['x loc'], row['y loc'], row['z loc'])
    width = 64
    w = int(width / 2)

    # Find the NIfTI file matching the scan number
    nifti_file_paths = glob('/home/m-health/TBM/Lung/Luna_16_final_nii/{}.nii'.format(row['Series Uid']))
    
    if len(nifti_file_paths) == 1:
        nifti_file_path = nifti_file_paths[0]
        image = sitk.ReadImage(nifti_file_path)
        
        # Define the desired target voxel size
        target_spacing = [0.7, 0.7, 1]

        # Create a resampling filter
        resampler = sitk.ResampleImageFilter()
        resampler.SetOutputSpacing(target_spacing)

        # Calculate the new size based on the ratio of original and target spacings
        original_spacing = image.GetSpacing()
        new_size = [int(sz * (spc / target_sp) + 0.5) for sz, spc, target_sp in zip(image.GetSize(), original_spacing, target_spacing)]
        resampler.SetSize(new_size)

        resampler.SetOutputDirection(image.GetDirection())
        resampler.SetOutputOrigin(image.GetOrigin())
        resampler.SetInterpolator(sitk.sitkLinear)
 
        # Execute the resampling
        resampled_image = resampler.Execute(image)

        # Convert the original nodule location to physical coordinates in the original image
        original_nodule_location_physical = image.TransformIndexToPhysicalPoint(original_nodule_location)

        # Transform the original nodule location to the new voxel space
        new_nodule_location_index = resampled_image.TransformPhysicalPointToIndex(original_nodule_location_physical)

        # Convert the index to integer voxel coordinates
        new_nodule_location_voxel_units = [int(round(index)) for index in new_nodule_location_index]
        
        # Crop the region around the specified coordinates
        x, y, z = new_nodule_location_voxel_units
        sx, sy, sz = resampled_image.GetSize()
        
        # Skip if patch goes out of bounds
        if (x - w < 0 or x + w > sx or
            y - w < 0 or y + w > sy or
            z - w < 0 or z + w > sz):
            print(f"Skipping index {index} - crop out of bounds")
            continue  # Skip this nodule
        
        resampled_image_cropped = resampled_image[
            x - w : x + w,
            y - w : y + w,
            z - w : z + w
        ]


        # Create output directory if it doesn't exist
        os.makedirs(output_path, exist_ok=True)

        # Create a unique identifier based on the index
        output_file_name = '{}_{}.nii'.format(row['Series Uid'], index)

        # Save the cropped image with the unique identifier
        output_file_path = os.path.join(output_path, output_file_name)
        sitk.WriteImage(resampled_image_cropped, output_file_path)

    else:
        print("NIfTI file not found for scan number:", row['Series Uid'])

In [None]:
import pandas as pd

# Read the Excel file
df = pd.read_excel('Luna_16_with_z_loc.xlsx')

# List of skipped indices
skipped_indices = [31, 445, 464, 470, 492, 615, 730, 820, 860, 862, 865, 872, 1035, 1115]

# Tell pandas to display full strings
pd.set_option('display.max_colwidth', None)

# Get the full Series UIDs
skipped_series_uids = df.loc[skipped_indices, 'Series Uid']

print(skipped_series_uids)


In [None]:

excel_file_path = 'Luna_16_with_z_loc.xlsx'
output_path = '/home/m-health/TBM/Lung/Luna_16_cropped_resampled_no_hu_64'
os.makedirs(output_path, exist_ok=True)

df = pd.read_excel(excel_file_path)

# Skipped indices from your previous run
skipped_indices = [31, 445, 464, 470, 492, 615, 730, 820, 860, 862, 865, 872, 1035, 1115]

width = 64
w = width // 2

for index in skipped_indices:
    row = df.loc[index]
    original_nodule_location = (row['x loc'], row['y loc'], row['z loc'])

    nifti_file_paths = glob(f'/home/m-health/TBM/Lung/Luna_16_final_nii/{row["Series Uid"]}.nii')
    
    if len(nifti_file_paths) != 1:
        print("NIfTI file not found for scan number:", row['Series Uid'])
        continue

    nifti_file_path = nifti_file_paths[0]
    image = sitk.ReadImage(nifti_file_path)

    # Resample
    target_spacing = [0.7, 0.7, 1]
    resampler = sitk.ResampleImageFilter()
    resampler.SetOutputSpacing(target_spacing)
    original_spacing = image.GetSpacing()
    new_size = [int(sz * (spc / target_sp) + 0.5) 
                for sz, spc, target_sp in zip(image.GetSize(), original_spacing, target_spacing)]
    resampler.SetSize(new_size)
    resampler.SetOutputDirection(image.GetDirection())
    resampler.SetOutputOrigin(image.GetOrigin())
    resampler.SetInterpolator(sitk.sitkLinear)
    resampled_image = resampler.Execute(image)
    original_nodule_location = (
    int(round(row['x loc'])), 
    int(round(row['y loc'])), 
    int(round(row['z loc']))
)

    # Transform nodule location
    original_nodule_location_physical = image.TransformIndexToPhysicalPoint(original_nodule_location)
    new_nodule_location_index = resampled_image.TransformPhysicalPointToIndex(original_nodule_location_physical)
    x, y, z = [int(round(i)) for i in new_nodule_location_index]

    # Get image size
    sx, sy, sz = resampled_image.GetSize()

    # Compute crop start and end (centered where possible, shift if out-of-bounds)
    x_start = max(0, min(sx - width, x - w))
    x_end   = x_start + width

    y_start = max(0, min(sy - width, y - w))
    y_end   = y_start + width

    z_start = max(0, min(sz - width, z - w))
    z_end   = z_start + width

    # Crop
    resampled_image_cropped = resampled_image[
        x_start:x_end,
        y_start:y_end,
        z_start:z_end
    ]

    # Save
    output_file_name = f'{row["Series Uid"]}_{index}.nii'
    sitk.WriteImage(resampled_image_cropped, os.path.join(output_path, output_file_name))

    print(f"Cropped and saved: {row['Series Uid']} (index {index})")


# Resampling and cropping for 32

In [None]:
import pandas as pd

excel_file_path = 'Luna_16_with_z_loc.xlsx'

# Output directory for cropped images
output_path = '/home/m-health/TBM/Lung/Luna_16_cropped_resampled_no_hu_32'

# Read the Excel file
df = pd.read_excel(excel_file_path)

# Loop through the rows in the DataFrame
for index, row in df.iterrows():
    original_nodule_location = (row['x loc'], row['y loc'], row['z loc'])
    width = 32
    w = int(width / 2)

    # Find the NIfTI file matching the scan number
    nifti_file_paths = glob('/home/m-health/TBM/Lung/Luna_16_final_nii/{}.nii'.format(row['Series Uid']))
    
    if len(nifti_file_paths) == 1:
        nifti_file_path = nifti_file_paths[0]
        image = sitk.ReadImage(nifti_file_path)
        
        # Define the desired target voxel size
        target_spacing = [0.7, 0.7, 1]

        # Create a resampling filter
        resampler = sitk.ResampleImageFilter()
        resampler.SetOutputSpacing(target_spacing)

        # Calculate the new size based on the ratio of original and target spacings
        original_spacing = image.GetSpacing()
        new_size = [int(sz * (spc / target_sp) + 0.5) for sz, spc, target_sp in zip(image.GetSize(), original_spacing, target_spacing)]
        resampler.SetSize(new_size)

        resampler.SetOutputDirection(image.GetDirection())
        resampler.SetOutputOrigin(image.GetOrigin())
        resampler.SetInterpolator(sitk.sitkLinear)
 
        # Execute the resampling
        resampled_image = resampler.Execute(image)

        # Convert the original nodule location to physical coordinates in the original image
        original_nodule_location_physical = image.TransformIndexToPhysicalPoint(original_nodule_location)

        # Transform the original nodule location to the new voxel space
        new_nodule_location_index = resampled_image.TransformPhysicalPointToIndex(original_nodule_location_physical)

        # Convert the index to integer voxel coordinates
        new_nodule_location_voxel_units = [int(round(index)) for index in new_nodule_location_index]
        
        # Crop the region around the specified coordinates
        resampled_image_cropped = resampled_image[new_nodule_location_voxel_units[0]-w:new_nodule_location_voxel_units[0]+w,
                                           new_nodule_location_voxel_units[1]-w:new_nodule_location_voxel_units[1]+w,
                                           new_nodule_location_voxel_units[2]-w:new_nodule_location_voxel_units[2]+w]


        # Create output directory if it doesn't exist
        os.makedirs(output_path, exist_ok=True)

        # Create a unique identifier based on the index
        output_file_name = '{}_{}.nii'.format(row['Series Uid'], index)

        # Save the cropped image with the unique identifier
        output_file_path = os.path.join(output_path, output_file_name)
        sitk.WriteImage(resampled_image_cropped, output_file_path)

    else:
        print("NIfTI file not found for scan number:", row['Series Uid'])

In [None]:
nifti_file_path = '/home/m-health/TBM/Lung/Luna_16_cropped_resampled_no_hu_32/1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222365663678666836860_1146.nii'

# Read the image using SimpleITK
image = sitk.ReadImage(nifti_file_path)

# Get the shape of the image
image_shape = image.GetSize()

print("Image Shape:", image_shape)

# Plotting 3 different patches of same Nodule 

In [None]:
import nibabel as nib
import matplotlib.pyplot as plt


# Base path
base_path = "/home/m-health/TBM/Lung"

# Filename
filename = "1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222365663678666836860_1147.nii"

# Folders to load from
folders = [
    "Luna_16_hu_from_crop_32",
    "Luna_16_hu_from_crop_48",
    "Luna_16_hu_from_crop_64"
]

images = []

for folder in folders:
    filepath = os.path.join(base_path, folder, filename)
    nii = nib.load(filepath)
    volume = nii.get_fdata()

    # Get mid-slice from the z-axis
    mid_z = volume.shape[2] // 2
    images.append(volume[:, :, mid_z])

# Plot all three slices
plt.figure(figsize=(15, 5))

titles = ["Patch 32×32×32", "Patch 48×48×48", "Patch 64×64×64"]

for i, img in enumerate(images):
    plt.subplot(1, 3, i + 1)
    plt.imshow(img, cmap="gray")
    plt.title(titles[i])
    plt.axis("off")

plt.suptitle("Mid-Slice Comparison of the Same Nodule Across Patch Sizes", fontsize=16)

output_path = os.path.join(base_path, "comparison_mid_slices_2.png")
plt.savefig(output_path, dpi=300, bbox_inches='tight')

plt.show()
