In [1]:
import os
from glob import glob
import SimpleITK as sitk
from tqdm.notebook import tqdm, trange
import shutil
import numpy as np
import pandas as pd

In [4]:
!pip install SimpleITK

Collecting SimpleITK
  Obtaining dependency information for SimpleITK from https://files.pythonhosted.org/packages/b3/e6/8510b79a8ab93248fb897c0ca3bc2c65a6cfca478ad7d5a53e950dd9b17b/SimpleITK-2.3.1-cp311-cp311-win_amd64.whl.metadata
  Downloading SimpleITK-2.3.1-cp311-cp311-win_amd64.whl.metadata (8.0 kB)
Downloading SimpleITK-2.3.1-cp311-cp311-win_amd64.whl (18.1 MB)
   ---------------------------------------- 0.0/18.1 MB ? eta -:--:--
   ---------------------------------------- 0.0/18.1 MB 1.4 MB/s eta 0:00:14
   ---------------------------------------- 0.1/18.1 MB 1.3 MB/s eta 0:00:14
    --------------------------------------- 0.2/18.1 MB 1.6 MB/s eta 0:00:12
    --------------------------------------- 0.4/18.1 MB 2.0 MB/s eta 0:00:09
   - -------------------------------------- 0.5/18.1 MB 2.3 MB/s eta 0:00:08
   - -------------------------------------- 0.9/18.1 MB 3.1 MB/s eta 0:00:06
   -- ------------------------------------- 1.0/18.1 MB 3.5 MB/s eta 0:00:05
   -- --------------

# Processing mhd to nii files

In [24]:
img_dir = 'E:/Tanvir Mehedi/Luna 16 Dataset/subset9'
img_list = glob(img_dir + '/*mhd')

print('No. of Image Files :', len(img_list))

No. of Image Files : 88


In [25]:
WORKING_DIR = "Luna_16_nii"
    
for i in tqdm(range(len(img_list))):
    img = sitk.ReadImage(img_list[i])
    
    # Extract the file name without the path
    file_name = os.path.basename(img_list[i])
    
    # Create the full output path in the WORKING_DIR with the same file name and a new extension
    output_path = os.path.join(WORKING_DIR, file_name.replace('.mhd', '.nii'))
    
    sitk.WriteImage(img, output_path)

  0%|          | 0/88 [00:00<?, ?it/s]

# HU coverting to 500-(-1000)

In [4]:
def normalizePlanes(npzarray):
    maxHU = 500.0
    minHU = -1000.0
    npzarray = (npzarray - minHU) / (maxHU - minHU)
    npzarray[npzarray > 1] = 1.0
    npzarray[npzarray < 0] = 0.0
    return npzarray

input_folder = 'E:/ATM_lung_cancer/Luna_16_cropped_resampled_no_hu/'
output_folder = 'E:/ATM_lung_cancer/Luna_16_hu_from_crop/'

# List all NIfTI files in the input folder
nifti_files = [file for file in os.listdir(input_folder) if file.endswith('.nii')]

for nifti_file in nifti_files:
    input_path = os.path.join(input_folder, nifti_file)
    output_path = os.path.join(output_folder, nifti_file)

    ds = sitk.ReadImage(input_path)
    image_array = sitk.GetArrayFromImage(ds)

    normalized_slices = []
    for slice in image_array:         
        normalized_slices.append(normalizePlanes(slice))
    normalized_array = np.array(normalized_slices)

    normalized_img = sitk.GetImageFromArray(normalized_array)
    normalized_img.CopyInformation(ds)

    sitk.WriteImage(normalized_img, output_path)

print("Normalization and saving completed for all files.")

Normalization and saving completed for all files.


# Calculating number of slices to update csv file

In [29]:
df= pd.read_excel('lidc_malignancy_mappedwith_LUNA_final.xlsx')


# Function to get the number of slices from a NIfTI file
def get_number_of_slices(nifti_path):
    image = sitk.ReadImage(nifti_path)
    image_size = image.GetSize()
    total_slices = image_size[2]
    return total_slices

# Iterate through the DataFrame rows and calculate the number of slices for each scan number
df['Number of Slices'] = df.apply(lambda row: get_number_of_slices(f'E:/ATM_lung_cancer/Luna_16_final_hu/{row["Series Uid"]}.nii'), axis=1)
# Display the DataFrame
print(df[:5])

       Subject ID                                         Series Uid  \
0  LIDC-IDRI-0001  1.3.6.1.4.1.14519.5.2.1.6279.6001.179049373636...   
1  LIDC-IDRI-0003  1.3.6.1.4.1.14519.5.2.1.6279.6001.170706757615...   
2  LIDC-IDRI-0003  1.3.6.1.4.1.14519.5.2.1.6279.6001.170706757615...   
3  LIDC-IDRI-0003  1.3.6.1.4.1.14519.5.2.1.6279.6001.170706757615...   
4  LIDC-IDRI-0004  1.3.6.1.4.1.14519.5.2.1.6279.6001.323541312620...   

   diameter  x loc  y loc  slice no  Radiologist1  Radiologist2  Radiologist3  \
0    23.107    317    367        43           5.0           5.0           4.0   
1     9.208    308    198        57           4.0           2.0           3.0   
2    13.058    221    219        56           4.0           2.0           3.0   
3    21.201    367    350        65           5.0           5.0           3.0   
4     4.811    142    323       165           1.0           2.0           1.0   

   Radiologist4  Number of Slices  
0           NaN               133  
1       

In [30]:
df.to_excel('Luna_16_with_z_loc.xlsx', index=False)

# Files that does not belong to excel sheet

In [50]:
# Load Excel file

df_excel = pd.read_excel('lidc_malignancy_mappedwith_LUNA.xlsx')

# Directory containing NII files
nii_directory = 'F:/subset0_nii_hu'

# Get a list of NII files in the directory (remove .nii extension)
nii_files = [os.path.splitext(f)[0] for f in os.listdir(nii_directory) if f.endswith('.nii')]

# Get unique Series Uid values from the Excel file
excel_series_uids = set(df_excel['Series Uid'].astype(str))

# Find files in the directory that are not in the Excel file
files_not_in_excel = [file for file in nii_files if file not in excel_series_uids]

# Display the result
print(f"Files not in Excel: {files_not_in_excel}")
print(f"Number of files not in Excel: {len(files_not_in_excel)}")


Files not in Excel: ['1.3.6.1.4.1.14519.5.2.1.6279.6001.105756658031515062000744821260', '1.3.6.1.4.1.14519.5.2.1.6279.6001.122763913896761494371822656720', '1.3.6.1.4.1.14519.5.2.1.6279.6001.126121460017257137098781143514', '1.3.6.1.4.1.14519.5.2.1.6279.6001.139713436241461669335487719526', '1.3.6.1.4.1.14519.5.2.1.6279.6001.144438612068946916340281098509', '1.3.6.1.4.1.14519.5.2.1.6279.6001.146429221666426688999739595820', '1.3.6.1.4.1.14519.5.2.1.6279.6001.194465340552956447447896167830', '1.3.6.1.4.1.14519.5.2.1.6279.6001.210837812047373739447725050963', '1.3.6.1.4.1.14519.5.2.1.6279.6001.231645134739451754302647733304', '1.3.6.1.4.1.14519.5.2.1.6279.6001.238522526736091851696274044574', '1.3.6.1.4.1.14519.5.2.1.6279.6001.250438451287314206124484591986', '1.3.6.1.4.1.14519.5.2.1.6279.6001.269689294231892620436462818860', '1.3.6.1.4.1.14519.5.2.1.6279.6001.278660284797073139172446973682', '1.3.6.1.4.1.14519.5.2.1.6279.6001.280972147860943609388015648430', '1.3.6.1.4.1.14519.5.2.1.62

# Resampling and cropping

In [3]:
import pandas as pd

excel_file_path = 'Luna_16_with_z_loc.xlsx'

# Output directory for cropped images
output_path = 'E:/ATM_lung_cancer/Luna_16_cropped_resampled_no_hu/'

# Read the Excel file
df = pd.read_excel(excel_file_path)

# Loop through the rows in the DataFrame
for index, row in df.iterrows():
    original_nodule_location = (row['x loc'], row['y loc'], row['z loc'])
    width = 32
    w = int(width / 2)

    # Find the NIfTI file matching the scan number
    nifti_file_paths = glob('E:/ATM_lung_cancer/Luna_16_final_nii/{}.nii'.format(row['Series Uid']))
    
    if len(nifti_file_paths) == 1:
        nifti_file_path = nifti_file_paths[0]
        image = sitk.ReadImage(nifti_file_path)
        
        # Define the desired target voxel size
        target_spacing = [0.7, 0.7, 1]

        # Create a resampling filter
        resampler = sitk.ResampleImageFilter()
        resampler.SetOutputSpacing(target_spacing)

        # Calculate the new size based on the ratio of original and target spacings
        original_spacing = image.GetSpacing()
        new_size = [int(sz * (spc / target_sp) + 0.5) for sz, spc, target_sp in zip(image.GetSize(), original_spacing, target_spacing)]
        resampler.SetSize(new_size)

        resampler.SetOutputDirection(image.GetDirection())
        resampler.SetOutputOrigin(image.GetOrigin())
        resampler.SetInterpolator(sitk.sitkLinear)
 
        # Execute the resampling
        resampled_image = resampler.Execute(image)

        # Convert the original nodule location to physical coordinates in the original image
        original_nodule_location_physical = image.TransformIndexToPhysicalPoint(original_nodule_location)

        # Transform the original nodule location to the new voxel space
        new_nodule_location_index = resampled_image.TransformPhysicalPointToIndex(original_nodule_location_physical)

        # Convert the index to integer voxel coordinates
        new_nodule_location_voxel_units = [int(round(index)) for index in new_nodule_location_index]
        
        # Crop the region around the specified coordinates
        resampled_image_cropped = resampled_image[new_nodule_location_voxel_units[0]-w:new_nodule_location_voxel_units[0]+w,
                                           new_nodule_location_voxel_units[1]-w:new_nodule_location_voxel_units[1]+w,
                                           new_nodule_location_voxel_units[2]-w:new_nodule_location_voxel_units[2]+w]


        # Create output directory if it doesn't exist
        os.makedirs(output_path, exist_ok=True)

        # Create a unique identifier based on the index
        output_file_name = '{}_{}.nii'.format(row['Series Uid'], index)

        # Save the cropped image with the unique identifier
        output_file_path = os.path.join(output_path, output_file_name)
        sitk.WriteImage(resampled_image_cropped, output_file_path)

    else:
        print("NIfTI file not found for scan number:", row['Series Uid'])

In [4]:
print( nifti_file_path)

E:/ATM_lung_cancer/Luna_16_final_hu/1.3.6.1.4.1.14519.5.2.1.6279.6001.300270516469599170290456821227.nii
