In [4]:
import os
import pathlib

In [14]:
filepath = "/mnt/i/main_project_linux/OpenMMDiT/datasets/Sclerosis/8bctsm8jz7-1/8bctsm8jz7-1/Brain MRI Dataset of Multiple Sclerosis with Consensus Manual Lesion Segmentation and Patient Meta Information"

In [27]:
import nibabel as nib
import numpy as np
import matplotlib.pyplot as plt

output_dir = '/mnt/i/main_project_linux/OpenMMDiT/datasets/MRI_DATASET/Sclerosis_processed'
os.makedirs(output_dir, exist_ok=True)
num_slices = 10

for patientFiles in os.listdir(filepath):
    patientFileDir = os.path.join(filepath,patientFiles)
    if os.path.isdir(patientFileDir):
        for mri in os.listdir(patientFileDir):
            MRIFileName = os.path.join(patientFileDir,mri)
            if "Seg" not in mri:
                input_dir = MRIFileName
                if mri.endswith(".nii") or mri.endswith(".nii.gz"):  # Support for compressed files
                    file_path = MRIFileName
                    
                    # Load NIfTI image
                    img = nib.load(file_path)
                    data = img.get_fdata()

                    middle_idx = data.shape[2] // 2
                    start_idx = max(0, middle_idx - num_slices // 2)
                    end_idx = min(data.shape[2], middle_idx + num_slices // 2)

                    # Extract and save the middle slices
                    for i, slice_idx in enumerate(range(start_idx, end_idx)):
                        slice_data = data[:, :, slice_idx]

                        # Normalize for better visualization
                        slice_data = (slice_data - np.min(slice_data)) / (np.max(slice_data) - np.min(slice_data) + 1e-7)

                        # Save each slice with an index
                        output_file = os.path.join(output_dir, f"{os.path.splitext(mri)[0]}_slice_{i+1}.png")
                        plt.imsave(output_file, slice_data, cmap="gray")

                        print(f"Saved: {output_file}")


Saved: /mnt/i/main_project_linux/OpenMMDiT/datasets/MRI_DATASET/Sclerosis_processed/1-Flair_slice_1.png
Saved: /mnt/i/main_project_linux/OpenMMDiT/datasets/MRI_DATASET/Sclerosis_processed/1-Flair_slice_2.png
Saved: /mnt/i/main_project_linux/OpenMMDiT/datasets/MRI_DATASET/Sclerosis_processed/1-Flair_slice_3.png
Saved: /mnt/i/main_project_linux/OpenMMDiT/datasets/MRI_DATASET/Sclerosis_processed/1-Flair_slice_4.png
Saved: /mnt/i/main_project_linux/OpenMMDiT/datasets/MRI_DATASET/Sclerosis_processed/1-Flair_slice_5.png
Saved: /mnt/i/main_project_linux/OpenMMDiT/datasets/MRI_DATASET/Sclerosis_processed/1-Flair_slice_6.png
Saved: /mnt/i/main_project_linux/OpenMMDiT/datasets/MRI_DATASET/Sclerosis_processed/1-Flair_slice_7.png
Saved: /mnt/i/main_project_linux/OpenMMDiT/datasets/MRI_DATASET/Sclerosis_processed/1-Flair_slice_8.png
Saved: /mnt/i/main_project_linux/OpenMMDiT/datasets/MRI_DATASET/Sclerosis_processed/1-Flair_slice_9.png
Saved: /mnt/i/main_project_linux/OpenMMDiT/datasets/MRI_DATASET/

In [30]:
len(os.listdir(output_dir))

1800

In [35]:
csv_data = []
for images in os.listdir(output_dir):
    if 'T1' in images:
        caption = 'T1-Weighted Brain MRI of a Patient with Sclerosis.'
    if 'T2' in images:
        caption = 'T2-Weighted Brain MRI of a Patient with Sclerosis.'
    if 'Flair' in images:
        caption = 'Flair Brain MRI of a Patient with Sclerosis.'
    csv_data.append([os.path.join(output_dir[36:],images) , caption])

In [37]:
import pandas as pd

In [39]:
df = pd.DataFrame(csv_data,columns=['image' , 'caption'])

In [41]:
df.to_csv(os.path.join(output_dir, "data_captions.csv"), index=False)