In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
import os
import numpy as np
import nibabel as nib
from PIL import Image
import re
from pathlib import Path

In [6]:
import os
import numpy as np
import nibabel as nib
from PIL import Image
import re
from pathlib import Path
import warnings

def images_to_niigz(input_folder, output_folder, modalities, slices_per_modality=300, is_dicom=False):
    """
    Convert images to .nii.gz files with proper alignment for each modality and create combined files m1, m2, m3.
    
    Parameters:
    - input_folder (str): Path to folder containing images.
    - output_folder (str): Path to save .nii.gz files.
    - modalities (list): List of modality names (e.g., ['F', 'T1', 'T1c', 'T2', 'tumor_binary_mask', 'tumor_grade_mask']).
    - slices_per_modality (int): Number of slices per modality (default: 300).
    - is_dicom (bool): True if images are DICOM, False for PNG/JPEG (default: False).
    
    Returns:
    - None (saves .nii.gz files to output_folder).
    """
    # Create output folder
    Path(output_folder).mkdir(parents=True, exist_ok=True)
    
    # Check if input folder exists and is not empty
    if not os.path.exists(input_folder):
        raise FileNotFoundError(f"Input folder {input_folder} does not exist")
    files = os.listdir(input_folder)
    if not files:
        raise FileNotFoundError(f"Input folder {input_folder} is empty")
    
    # Dictionary to store images for each modality
    modality_data = {mod: [] for mod in modalities}
    
    # Regex to match filenames like batch_0_inpaint_X_input_Y_MODALITY.png
    pattern = re.compile(r'batch_\d+_inpaint_\d+_input_(\d+)_(F|T1|T1c|T2|tumor_binary_mask|tumor_grade_mask)(?:_sample)?\.(png|jpg|jpeg|dcm)')
    
    # Print all filenames for debugging
    print("Files in input folder:")
    for filename in sorted(files):
        print(f"  {filename}")
    
    # Read and sort images
    for filename in sorted(files):
        match = pattern.match(filename)
        if not match:
            print(f"Skipping {filename}: does not match pattern")
            continue
        slice_num, modality, _ = match.groups()
        slice_num = int(slice_num) + 1  # Convert input_Y to 1-based slice number (input_0 -> slice 1)
        
        if modality not in modalities:
            print(f"Skipping {filename}: modality {modality} not in {modalities}")
            continue
        
        print(f"Matched {filename}: slice {slice_num}, modality {modality}")
        
        # Read image
        img_path = os.path.join(input_folder, filename)
        try:
            if is_dicom:
                ds = pydicom.dcmread(img_path)
                img_array = ds.pixel_array.astype(np.float32)
            else:
                img = Image.open(img_path).convert('L')
                img_array = np.array(img)
        except Exception as e:
            print(f"Error reading {filename}: {e}")
            continue
        
        # Check dimensions
        if modality_data[modality] and img_array.shape != modality_data[modality][0][1].shape:
            warnings.warn(f"Inconsistent dimensions for {filename}: {img_array.shape}, expected {modality_data[modality][0][1].shape}")
            img = Image.fromarray(img_array).resize(modality_data[modality][0][1].shape, Image.Resampling.LANCZOS)
            img_array = np.array(img)
        
        # Store image with slice number
        modality_data[modality].append((slice_num, img_array))
    
    # Set affine matrix (1mm isotropic default)
    affine = np.eye(4)
    if is_dicom:
        first_file = next((f for f in files if f.endswith('.dcm')), None)
        if first_file:
            ds = pydicom.dcmread(os.path.join(input_folder, first_file))
            voxel_spacing = ds.PixelSpacing + [ds.SliceThickness] if hasattr(ds, 'SliceThickness') else [1.0, 1.0, 1.0]
            affine = np.diag(voxel_spacing + [1.0])
    
    # Process individual modality .nii.gz files
    for modality in modalities:
        if len(modality_data[modality]) == 0:
            warnings.warn(f"No slices found for {modality}")
            continue
        if len(modality_data[modality]) != slices_per_modality:
            warnings.warn(f"Found {len(modality_data[modality])} slices for {modality}, expected {slices_per_modality}")
        
        # Sort by slice number and check for gaps
        sorted_slices = sorted(modality_data[modality], key=lambda x: x[0])
        slice_nums = [data[0] for data in sorted_slices]
        if slice_nums != list(range(1, slices_per_modality + 1)):
            warnings.warn(f"Non-sequential slices for {modality}: {slice_nums[:5]}...{slice_nums[-5:]}")
        
        # Stack images into 3D array
        slices = [data[1] for data in sorted_slices]
        try:
            volume = np.stack(slices, axis=-1)  # Shape: (height, width, slices)
        except ValueError as e:
            print(f"Error stacking slices for {modality}: {e}")
            continue
        
        # Set data type
        if modality in ['tumor_binary_mask', 'tumor_grade_mask']:
            volume = volume.astype(np.uint8)
        else:
            volume = volume.astype(np.float32)
        
        # Create NIfTI image
        nii_img = nib.Nifti1Image(volume, affine)
        nii_img = nib.as_closest_canonical(nii_img)
        
        # Save
        output_path = os.path.join(output_folder, f"{modality}.nii.gz")
        nib.save(nii_img, output_path)
        print(f"Saved {output_path}")
    
    # Create combined .nii.gz files
    # m1: Blend T1 and T1c
    if all(mod in modality_data and modality_data[mod] for mod in ['T1', 'T1c']):
        t1_volume = np.stack([data[1] for data in sorted(modality_data['T1'], key=lambda x: x[0])], axis=-1).astype(np.float32)
        t1c_volume = np.stack([data[1] for data in sorted(modality_data['T1c'], key=lambda x: x[0])], axis=-1).astype(np.float32)
        m1_volume = (t1_volume + t1c_volume) / 2.0
        nii_img = nib.Nifti1Image(m1_volume, affine)
        nii_img = nib.as_closest_canonical(nii_img)
        nib.save(nii_img, os.path.join(output_folder, 'm1.nii.gz'))
        print("Saved m1.nii.gz")
    else:
        print("Skipping m1: missing T1 or T1c slices")
    
    # m2: Overlay tumor_binary_mask on T1+T1c blend
    if all(mod in modality_data and modality_data[mod] for mod in ['tumor_binary_mask', 'T1', 'T1c']):
        mask_volume = np.stack([data[1] for data in sorted(modality_data['tumor_binary_mask'], key=lambda x: x[0])], axis=-1).astype(np.uint8)
        t1_volume = np.stack([data[1] for data in sorted(modality_data['T1'], key=lambda x: x[0])], axis=-1).astype(np.float32)
        t1c_volume = np.stack([data[1] for data in sorted(modality_data['T1c'], key=lambda x: x[0])], axis=-1).astype(np.float32)
        blended_volume = (t1_volume + t1c_volume) / 2.0
        m2_volume = blended_volume.copy()
        m2_volume[mask_volume > 0] = blended_volume.max()
        nii_img = nib.Nifti1Image(m2_volume, affine)
        nii_img = nib.as_closest_canonical(nii_img)
        nib.save(nii_img, os.path.join(output_folder, 'm2.nii.gz'))
        print("Saved m2.nii.gz")
    else:
        print("Skipping m2: missing tumor_binary_mask, T1, or T1c slices")
    
    # m3: Stack T1, T1c, T2 as 3 channels
    if all(mod in modality_data and modality_data[mod] for mod in ['T1', 'T1c', 'T2']):
        t1_volume = np.stack([data[1] for data in sorted(modality_data['T1'], key=lambda x: x[0])], axis=-1).astype(np.float32)
        t1c_volume = np.stack([data[1] for data in sorted(modality_data['T1c'], key=lambda x: x[0])], axis=-1).astype(np.float32)
        t2_volume = np.stack([data[1] for data in sorted(modality_data['T2'], key=lambda x: x[0])], axis=-1).astype(np.float32)
        m3_volume = np.stack([t1_volume, t1c_volume, t2_volume], axis=-1)
        nii_img = nib.Nifti1Image(m3_volume, affine)
        nii_img = nib.as_closest_canonical(nii_img)
        nib.save(nii_img, os.path.join(output_folder, 'm3.nii.gz'))
        print("Saved m3.nii.gz")
    else:
        print("Skipping m3: missing T1, T1c, or T2 slices")

# Example usage for Kaggle
input_folder = "/kaggle/input/brain-mri-for-nii-format"  # Update to your dataset folder
output_folder = "/kaggle/working/output"
modalities = ["F", "T1", "T1c", "T2", "tumor_binary_mask", "tumor_grade_mask"]
images_to_niigz(input_folder, output_folder, modalities, slices_per_modality=300, is_dicom=False)

Files in input folder:
  batch_0_inpaint_0_input_0_F_sample.png
  batch_0_inpaint_0_input_0_T1_sample.png
  batch_0_inpaint_0_input_0_T1c_sample.png
  batch_0_inpaint_0_input_0_T2_sample.png
  batch_0_inpaint_0_input_0_tumor_binary_mask.png
  batch_0_inpaint_0_input_0_tumor_grade_mask.png
  batch_0_inpaint_10_input_5_F_sample.png
  batch_0_inpaint_10_input_5_T1_sample.png
  batch_0_inpaint_10_input_5_T1c_sample.png
  batch_0_inpaint_10_input_5_T2_sample.png
  batch_0_inpaint_10_input_5_tumor_binary_mask.png
  batch_0_inpaint_10_input_5_tumor_grade_mask.png
  batch_0_inpaint_11_input_5_F_sample.png
  batch_0_inpaint_11_input_5_T1_sample.png
  batch_0_inpaint_11_input_5_T1c_sample.png
  batch_0_inpaint_11_input_5_T2_sample.png
  batch_0_inpaint_11_input_5_tumor_binary_mask.png
  batch_0_inpaint_11_input_5_tumor_grade_mask.png
  batch_0_inpaint_12_input_6_F_sample.png
  batch_0_inpaint_12_input_6_T1_sample.png
  batch_0_inpaint_12_input_6_T1c_sample.png
  batch_0_inpaint_12_input_6_T2_sam



Saved /kaggle/working/output/F.nii.gz




Saved /kaggle/working/output/T1.nii.gz




Saved /kaggle/working/output/T1c.nii.gz




Saved /kaggle/working/output/T2.nii.gz




Saved /kaggle/working/output/tumor_binary_mask.nii.gz




Saved /kaggle/working/output/tumor_grade_mask.nii.gz
Saved m1.nii.gz
Saved m2.nii.gz
Saved m3.nii.gz


In [5]:
import os
print(sorted(os.listdir("/kaggle/input/brain-mri-for-nii-format"))[:10])


['batch_0_inpaint_0_input_0_F_sample.png', 'batch_0_inpaint_0_input_0_T1_sample.png', 'batch_0_inpaint_0_input_0_T1c_sample.png', 'batch_0_inpaint_0_input_0_T2_sample.png', 'batch_0_inpaint_0_input_0_tumor_binary_mask.png', 'batch_0_inpaint_0_input_0_tumor_grade_mask.png', 'batch_0_inpaint_10_input_5_F_sample.png', 'batch_0_inpaint_10_input_5_T1_sample.png', 'batch_0_inpaint_10_input_5_T1c_sample.png', 'batch_0_inpaint_10_input_5_T2_sample.png']
