In [22]:
import matplotlib.pyplot as plt
import nibabel as nib
import pickle
from tqdm import tqdm

import shutil
import glob
import json
import os
import numpy as np

from monai import transforms

# Prepare training and testing dataset

In [None]:
imageTr_paths = sorted(glob.glob("G:/gitCode/MGH/data/ACDC/database/training/patient*/patient*_frame" + ('[0-9]' * 2) + ".nii.gz"))
labelTr_paths = sorted(glob.glob("G:/gitCode/MGH/data/ACDC/database/training/patient*/patient*_frame" + ('[0-9]' * 2) + "_gt.nii.gz"))
imageTs_paths = sorted(glob.glob("G:/gitCode/MGH/data/ACDC/database/testing/patient*/patient*_frame" + ('[0-9]' * 2) + ".nii.gz"))
labelTs_paths = sorted(glob.glob("G:/gitCode/MGH/data/ACDC/database/testing/patient*/patient*_frame" + ('[0-9]' * 2) + "_gt.nii.gz"))
infoTr_paths = sorted(glob.glob("G:/gitCode/MGH/data/ACDC/database/training/patient*/Info.cfg"))
infoTs_paths = sorted(glob.glob("G:/gitCode/MGH/data/ACDC/database/testing/patient*/Info.cfg"))

save_imgTr_dir = "G:/gitCode/MGH/ACDCTest/dataset/imagesTr"
save_lbTr_dir = "G:/gitCode/MGH/ACDCTest/dataset/labelsTr"
save_imgTs_dir = "G:/gitCode/MGH/ACDCTest/dataset/imagesTs"
save_lbTs_dir = "G:/gitCode/MGH/ACDCTest/dataset/labelsTs"
save_infoTr_dir = "G:/gitCode/MGH/ACDCTest/dataset/infosTr"
save_infoTs_dir = "G:/gitCode/MGH/ACDCTest/dataset/infosTs"

for path in imageTr_paths:
    save_path = os.path.join(save_imgTr_dir, os.path.split(path)[-1])
    shutil.copyfile(path, save_path)

for path in imageTs_paths:
    save_path = os.path.join(save_imgTs_dir, os.path.split(path)[-1])
    shutil.copyfile(path, save_path)

for path in labelTr_paths:
    save_path = os.path.join(save_lbTr_dir, os.path.split(path)[-1])
    shutil.copyfile(path, save_path)

for path in labelTs_paths:
    save_path = os.path.join(save_lbTs_dir, os.path.split(path)[-1])
    shutil.copyfile(path, save_path)

for path in infoTr_paths:
    
    save_path = os.path.join(save_infoTr_dir, '_'.join(os.path.normpath(path).split(os.path.sep)[-2:]))
    shutil.copyfile(path, save_path)

for path in infoTs_paths:
    save_path = os.path.join(save_infoTs_dir, '_'.join(os.path.normpath(path).split(os.path.sep)[-2:]))
    shutil.copyfile(path, save_path)

# Prepare Datsset Json

In [None]:
imageTr_paths = sorted(glob.glob("G:/gitCode/MGH/ACDCTest/dataset/imagesTr/patient*_frame" + ('[0-9]' * 2) + ".nii.gz"))
labelTr_paths = sorted(glob.glob("G:/gitCode/MGH/ACDCTest/dataset/labelsTr/patient*_frame" + ('[0-9]' * 2) + "_gt.nii.gz"))
imageTs_paths = sorted(glob.glob("G:/gitCode/MGH/ACDCTest/dataset/imagesTs/patient*_frame" + ('[0-9]' * 2) + ".nii.gz"))
labelTs_paths = sorted(glob.glob("G:/gitCode/MGH/ACDCTest/dataset/labelsTs/patient*_frame" + ('[0-9]' * 2) + "_gt.nii.gz"))
infoTr_paths = sorted(glob.glob("G:/gitCode/MGH/ACDCTest/dataset/infosTr/patient*_Info.cfg"))
infoTs_paths = sorted(glob.glob("G:/gitCode/MGH/ACDCTest/dataset/infosTs/patient*_Info.cfg"))

data_json = {"description": "acdc shihua",
             "name": 'acdc',
             "tensorImageSize": "3D",
             "modality": {"0": "MRI"},
             "labels": {
                 "0": "background",
                 "1": "RV",
                 "2": "myocardium",
                 "3": "LV"
             },
             "groups": {
                 "NOR": 0,
                 "MINF": 1,
                 "DCM": 2,
                 "HCM": 3,
                 "RV": 4
             }}

In [None]:
import random
np.random.seed(42)
selectedIDs = sorted(["patient" + str(id).zfill(3) for id in np.random.choice(range(1, 101), size=80, replace=False)])
selectedIDs

In [None]:
training = []
for img_path, lb_path in zip(imageTr_paths, labelTr_paths):
    patient_ID = os.path.split(img_path)[-1].split("_")[0]
    if patient_ID in selectedIDs:
        path_dic = {}
        patient_ID = os.path.split(img_path)[-1].split('_')[0]
        path_dic['image'] = '/'.join(os.path.normpath(img_path).split(os.path.sep)[-2:])
        path_dic['label'] = '/'.join(os.path.normpath(lb_path).split(os.path.sep)[-2:])
        path_dic['info'] = '/'.join(["infosTr", patient_ID+"_Info.cfg"])
        training.append(path_dic)

data_json['training'] = training

In [None]:
validating = []
for img_path, lb_path in zip(imageTr_paths, labelTr_paths):
    patient_ID = os.path.split(img_path)[-1].split('_')[0]
    if patient_ID not in selectedIDs:
        path_dic = {}
        path_dic['image'] = '/'.join(os.path.normpath(img_path).split(os.path.sep)[-2:])
        path_dic['label'] = '/'.join(os.path.normpath(lb_path).split(os.path.sep)[-2:])
        path_dic['info'] = '/'.join(["infosTr", patient_ID+"_Info.cfg"])
        validating.append(path_dic)

data_json['validating'] = validating

In [None]:
testing = []
for img_path, lb_path in zip(imageTs_paths, labelTs_paths):
    path_dic = {}
    patient_ID = os.path.split(img_path)[-1].split('_')[0]
    path_dic['image'] = '/'.join(os.path.normpath(img_path).split(os.path.sep)[-2:])
    path_dic['label'] = '/'.join(os.path.normpath(lb_path).split(os.path.sep)[-2:])
    path_dic['info'] = '/'.join(["infosTs", patient_ID+"_Info.cfg"])
    testing.append(path_dic)

data_json['testing'] = testing

In [None]:

with open('G:/gitCode/MGH/ACDCTest/dataset/dataset.json', 'w', encoding='utf-8') as f:
    json.dump(data_json, f, ensure_ascii=False, indent=4)

In [None]:
with open('G:/gitCode/MGH/ACDCTest/dataset/dataset.pkl', 'wb') as file:
    pickle.dump(data_json, file)

# prepare ED 

In [None]:
all_train_IDs = sorted(["patient" + str(id).zfill(3) for id in range(1, 101)])
training = []
for patient_ID in all_train_IDs:
    if patient_ID in selectedIDs:
        path_dic = {}
        path_dic['info'] = '/'.join(["infosTr", patient_ID+"_Info.cfg"])

        with open(f"G:/gitCode/MGH/ACDCTest/dataset/infosTr/{patient_ID}_Info.cfg") as f:
                    lines = f.readlines()
                    for line in lines:
                        
                        items = line.strip().split(': ')
                        if items[0] == "ED":
                            path_dic['image'] = f"imagesTr/{patient_ID}_frame{str(items[1]).zfill(2)}.nii.gz"
                            path_dic['label'] = f"labelsTr/{patient_ID}_frame{str(items[1]).zfill(2)}_gt.nii.gz"
        training.append(path_dic)
    
data_json['training'] = training


validating = []
for patient_ID in all_train_IDs:
    if patient_ID not in selectedIDs:
        path_dic = {}
        path_dic['info'] = '/'.join(["infosTr", patient_ID+"_Info.cfg"])

        with open(f"G:/gitCode/MGH/ACDCTest/dataset/infosTr/{patient_ID}_Info.cfg") as f:
                    lines = f.readlines()
                    for line in lines:
                        
                        items = line.strip().split(': ')
                        if items[0] == "ED":
                            path_dic['image'] = f"imagesTr/{patient_ID}_frame{str(items[1]).zfill(2)}.nii.gz"
                            path_dic['label'] = f"labelsTr/{patient_ID}_frame{str(items[1]).zfill(2)}_gt.nii.gz"
        validating.append(path_dic)
    
data_json['validating'] = validating


all_test_IDs = sorted(["patient" + str(id).zfill(3) for id in range(101, 151)])
testing = []
for patient_ID in all_test_IDs:
    path_dic = {}
    path_dic['info'] = '/'.join(["infosTs", patient_ID+"_Info.cfg"])

    with open(f"G:/gitCode/MGH/ACDCTest/dataset/infosTs/{patient_ID}_Info.cfg") as f:
                lines = f.readlines()
                for line in lines:
                    
                    items = line.strip().split(': ')
                    if items[0] == "ED":
                        path_dic['image'] = f"imagesTs/{patient_ID}_frame{str(items[1]).zfill(2)}.nii.gz"
                        path_dic['label'] = f"labelsTs/{patient_ID}_frame{str(items[1]).zfill(2)}_gt.nii.gz"
    testing.append(path_dic)
    
data_json['testing'] = testing

with open('G:/gitCode/MGH/ACDCTest/dataset/dataset_ed.pkl', 'wb') as file:
    pickle.dump(data_json, file)


# prepare ES

In [None]:
all_train_IDs = sorted(["patient" + str(id).zfill(3) for id in range(1, 101)])
training = []
for patient_ID in all_train_IDs:
    if patient_ID in selectedIDs:
        path_dic = {}
        path_dic['info'] = '/'.join(["infosTr", patient_ID+"_Info.cfg"])

        with open(f"G:/gitCode/MGH/ACDCTest/dataset/infosTr/{patient_ID}_Info.cfg") as f:
                    lines = f.readlines()
                    for line in lines:
                        
                        items = line.strip().split(': ')
                        if items[0] == "ES":
                            path_dic['image'] = f"imagesTr/{patient_ID}_frame{str(items[1]).zfill(2)}.nii.gz"
                            path_dic['label'] = f"labelsTr/{patient_ID}_frame{str(items[1]).zfill(2)}_gt.nii.gz"
        training.append(path_dic)
    
data_json['training'] = training


validating = []
for patient_ID in all_train_IDs:
    if patient_ID not in selectedIDs:
        path_dic = {}
        path_dic['info'] = '/'.join(["infosTr", patient_ID+"_Info.cfg"])

        with open(f"G:/gitCode/MGH/ACDCTest/dataset/infosTr/{patient_ID}_Info.cfg") as f:
                    lines = f.readlines()
                    for line in lines:
                        
                        items = line.strip().split(': ')
                        if items[0] == "ES":
                            path_dic['image'] = f"imagesTr/{patient_ID}_frame{str(items[1]).zfill(2)}.nii.gz"
                            path_dic['label'] = f"labelsTr/{patient_ID}_frame{str(items[1]).zfill(2)}_gt.nii.gz"
        validating.append(path_dic)
    
data_json['validating'] = validating


all_test_IDs = sorted(["patient" + str(id).zfill(3) for id in range(101, 151)])
testing = []
for patient_ID in all_test_IDs:
    path_dic = {}
    path_dic['info'] = '/'.join(["infosTs", patient_ID+"_Info.cfg"])

    with open(f"G:/gitCode/MGH/ACDCTest/dataset/infosTs/{patient_ID}_Info.cfg") as f:
                lines = f.readlines()
                for line in lines:
                    
                    items = line.strip().split(': ')
                    if items[0] == "ES":
                        path_dic['image'] = f"imagesTs/{patient_ID}_frame{str(items[1]).zfill(2)}.nii.gz"
                        path_dic['label'] = f"labelsTs/{patient_ID}_frame{str(items[1]).zfill(2)}_gt.nii.gz"
    testing.append(path_dic)
    
data_json['testing'] = testing

with open('G:/gitCode/MGH/ACDCTest/dataset/dataset_es.pkl', 'wb') as file:
    pickle.dump(data_json, file)


# Splite 4D model to 3D

In [27]:
data_json = {"description": "acdc shihua",
             "name": 'acdc',
             "tensorImageSize": "3D",
             "modality": {"0": "MRI"},
             "labels": {
                 "0": "background",
                 "1": "RV",
                 "2": "myocardium",
                 "3": "LV"
             },
             "groups": {
                 "NOR": 0,
                 "MINF": 1,
                 "DCM": 2,
                 "HCM": 3,
                 "RV": 4
             }}

In [28]:
imageTr_paths = sorted(glob.glob("G:/gitCode/MGH/data/ACDC/database/training/*/*4d.nii.gz"))
imageTs_paths = sorted(glob.glob("G:/gitCode/MGH/data/ACDC/database/testing/*/*4d.nii.gz"))
infoTr_paths = sorted(glob.glob("G:/gitCode/MGH/data/ACDC/database/training/*/Info.cfg"))
infoTs_paths = sorted(glob.glob("G:/gitCode/MGH/data/ACDC/database/testing/*/Info.cfg"))

In [29]:
import random
np.random.seed(42)
selectedIDs = sorted(["patient" + str(id).zfill(3) for id in np.random.choice(range(1, 101), size=80, replace=False)])
selectedIDs[0: 5]

['patient001', 'patient004', 'patient005', 'patient006', 'patient007']

In [30]:
training = []

main_dir = "G:/gitCode/MGH/acdcSSL/dataset/mae3d"
dataset_dir = "imagesTr"
img_save_dir = os.path.join(main_dir, dataset_dir)
os.makedirs(img_save_dir, exist_ok=True)

with tqdm(total=len(selectedIDs), desc="processing data.........") as pbar:
    for img_path in imageTr_paths:
        patient_ID = os.path.split(img_path)[-1].split("_")[0]
        if patient_ID in selectedIDs:
            img = nib.load(img_path)
            data = img.get_fdata()
            L = data.shape[-1]

            info_path = os.path.join(os.path.split(img_path)[0], "Info.cfg")
            inforDir = {}
            with open(info_path, 'r') as file:
                for line in file:
                    lineList = line.strip().split(": ")
                    inforDir[lineList[0]] = lineList[1]
            
            for i in range(L):
                data3D = data[..., i]
                img3D = nib.Nifti1Image(data3D, img.affine)
                info = inforDir.copy()
                info['image'] = dataset_dir + f"/{patient_ID}_frame{str(i).zfill(3)}.nii.gz"
                nib.save(img3D, os.path.join(main_dir, info['image']))
                training.append(info)
            pbar.update(1)
data_json['training'] = training

processing data.........: 100%|██████████| 80/80 [02:39<00:00,  1.99s/it]


In [31]:
validating = []

main_dir = "G:/gitCode/MGH/acdcSSL/dataset/mae3d"
dataset_dir = "imagesVd"
img_save_dir = os.path.join(main_dir, dataset_dir)
os.makedirs(img_save_dir, exist_ok=True)

with tqdm(total=100-len(selectedIDs), desc="processing data.........") as pbar:
    for img_path in imageTr_paths:
        patient_ID = os.path.split(img_path)[-1].split("_")[0]
        if patient_ID not in selectedIDs:
            img = nib.load(img_path)
            data = img.get_fdata()
            L = data.shape[-1]

            info_path = os.path.join(os.path.split(img_path)[0], "Info.cfg")
            inforDir = {}
            with open(info_path, 'r') as file:
                for line in file:
                    lineList = line.strip().split(": ")
                    inforDir[lineList[0]] = lineList[1]
            
            for i in range(L):
                data3D = data[..., i]
                img3D = nib.Nifti1Image(data3D, img.affine)
                info = inforDir.copy()
                info['image'] = dataset_dir + f"/{patient_ID}_frame{str(i).zfill(3)}.nii.gz"
                nib.save(img3D, os.path.join(main_dir, info['image']))
                validating.append(info)
            pbar.update(1)
data_json['validating'] = validating

processing data.........:  25%|██▌       | 20/80 [00:37<01:52,  1.87s/it]


In [33]:
testing = []

main_dir = "G:/gitCode/MGH/acdcSSL/dataset/mae3d"
dataset_dir = "imagesTs"
img_save_dir = os.path.join(main_dir, dataset_dir)
os.makedirs(img_save_dir, exist_ok=True)

with tqdm(total=len(imageTs_paths), desc="processing data.........") as pbar:
    for img_path in imageTs_paths:
        patient_ID = os.path.split(img_path)[-1].split("_")[0]
        img = nib.load(img_path)
        data = img.get_fdata()
        L = data.shape[-1]

        info_path = os.path.join(os.path.split(img_path)[0], "Info.cfg")
        inforDir = {}
        with open(info_path, 'r') as file:
            for line in file:
                lineList = line.strip().split(": ")
                inforDir[lineList[0]] = lineList[1]
        
        for i in range(L):
            data3D = data[..., i]
            img3D = nib.Nifti1Image(data3D, img.affine)
            info = inforDir.copy()
            info['image'] = dataset_dir + f"/{patient_ID}_frame{str(i).zfill(3)}.nii.gz"
            nib.save(img3D, os.path.join(main_dir, info['image']))
            testing.append(info)
        pbar.update(1)
data_json['testing'] = testing

processing data.........: 100%|██████████| 50/50 [01:40<00:00,  2.01s/it]


In [34]:
with open('G:/gitCode/MGH/acdcSSL/dataset/mae3d/dataset3d.json', 'w', encoding='utf-8') as f:
    json.dump(data_json, f, ensure_ascii=False, indent=4)
with open('G:/gitCode/MGH/acdcSSL/dataset/mae3d/dataset3d.pkl', 'wb') as file:
    pickle.dump(data_json, file)