In [68]:
import os
import pydicom
import numpy as np
from PIL import Image

def dcm_to_png(dcm_path, output_folder, index):
    os.makedirs(output_folder, exist_ok=True)
    dicom_image = pydicom.dcmread(dcm_path)
    
    image_array = dicom_image.pixel_array.astype(np.float32)

    if np.max(image_array) == 0:
        print(f"Skipping black image: {dcm_path}")
        return False , False

    image_array -= np.min(image_array)
    if np.max(image_array) > 0: 
        image_array = (image_array / np.max(image_array)) * 255.0
    
    image_array = image_array.astype(np.uint8)

    img = Image.fromarray(image_array)
    
    filename = os.path.splitext(os.path.basename(dcm_path))[0] + f"{index}.png"
    output_path = os.path.join(output_folder, filename)
    
    img.save(output_path)
    print(f"Converted: {dcm_path} -> {output_path}")

    return (True , output_path)

def batch_convert_dcm_to_png(input_folder, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    for filename in os.listdir(input_folder):
        if filename.endswith(".dcm"):
            dcm_to_png(os.path.join(input_folder, filename), output_folder)



In [69]:
file_path = "/mnt/i/main_project_linux/OpenMMDiT/datasets/Cancer Archieve Tumor Large/PKG - RSNA-ASNR-MICCAI-BraTS-2021/RSNA-ASNR-MICCAI-BraTS-2021/BraTS2021_TrainingSet_dcm"

In [70]:
output_file = "/mnt/i/main_project_linux/OpenMMDiT/datasets/MRI_DATASET/Large_Tumor_dataset_processed"

In [71]:
import os

In [72]:
csv_data = []

In [None]:
global_index = 1
for datasets in os.listdir(file_path):
    dataset_root_path = os.path.join(file_path,datasets)
    for dataset_patient_number in os.listdir(dataset_root_path):
        patient_root_folder = os.path.join(dataset_root_path,dataset_patient_number)
        for mri_types in os.listdir(patient_root_folder):
            mri_type_folder = os.path.join(patient_root_folder,mri_types)
            for dcm_file_name in os.listdir(mri_type_folder):
                if dcm_file_name.endswith(".dcm"):
                    dcm_file_path = os.path.join(mri_type_folder,dcm_file_name)
                    validity, output_file_name = dcm_to_png(dcm_file_path , output_file , global_index)
                    if validity:
                        global_index+=1
                        if 'T1' in mri_types:
                            caption = 'T1-Weighted Brain MRI of a Patient with Tumour. '
                        if 'T2' in mri_types:
                            caption = 'T2-Weighted Brain MRI of a Patient with Tumour. '
                        if 'FLAIR' in mri_types:
                            caption = 'Flair Brain MRI of a Patient with Tumour. '
                        csv_data.append([output_file_name[36:] , caption])
import pandas as pd
df = pd.DataFrame(csv_data,columns=["image","caption"])
df.to_csv(output_file,index=False)

In [None]:
len("/mnt/i/main_project_linux/OpenMMDiT/")

In [None]:
df.head()

In [None]:
print(csv_data)

In [79]:
df.to_csv('/mnt/i/main_project_linux/OpenMMDiT/datasets/MRI_DATASET/Captions.csv',index=False)