In [1]:
import os
import glob
import json
import shutil
import tempfile
import time
import monai
from monai.utils import set_determinism
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


# Huashan (Chinese Han)

In [2]:
# Set deterministic training for reproducibility
set_determinism(seed=0)

# Setup data directory
directory = os.environ.get("MONAI_DATA_DIRECTORY")
root_dir = os.getcwd() if directory is None else directory
print(root_dir)

# Training data

def create_dict():
    # Set the base path for the data
    base_path = os.path.join(root_dir, 'Data/Training')  
    
    # Set the path for the label folder
    label_path = os.path.join(base_path, 'Label')
    
    # Find all label files
    label_files = glob.glob(os.path.join(label_path, 'THIGH_*.nii.gz'))
    
    # Initialize an empty list for the dictionary
    ddict = []
    
    # Loop through each label file
    for label_file in label_files:
        # Extract the label file name
        label_name = os.path.basename(label_file)
        
        # Extract the image file names for each type
        fat_files = glob.glob(os.path.join(base_path, 'Fat', label_name.replace('.nii.gz', '_*.nii.gz')))
        water_files = glob.glob(os.path.join(base_path, 'Water', label_name.replace('.nii.gz', '_*.nii.gz')))
        t1_files = glob.glob(os.path.join(base_path, 'T1', label_name.replace('.nii.gz', '_*.nii.gz')))
        t2_files = glob.glob(os.path.join(base_path, 'T2', label_name.replace('.nii.gz', '_*.nii.gz')))
        stir_files = glob.glob(os.path.join(base_path, 'STIR', label_name.replace('.nii.gz', '_*.nii.gz')))
        
        # Create a dictionary for each set of files
        for i in range(len(fat_files)):
            ddict.append({'image': fat_files[i], 'label': label_file})
        for i in range(len(water_files)):
            ddict.append({'image': water_files[i], 'label': label_file})
        for i in range(len(t1_files)):
            ddict.append({'image': t1_files[i], 'label': label_file})
        for i in range(len(t2_files)):
            ddict.append({'image': t2_files[i], 'label': label_file})
        for i in range(len(stir_files)):
            ddict.append({'image': stir_files[i], 'label': label_file})
    
    return ddict

Training_dict1 = create_dict()
print(f'Training_dict1: {(len(Training_dict1))}')

# add some incompatible files (Others)
import re
image_files = glob.glob('/home/hua/Muscle_seg/Data/Training/Others/THIGH_*_000*.nii.gz')
label_files = glob.glob('/home/hua/Muscle_seg/Data/Training/Others/THIGH_*_others.nii.gz')

pattern = r'(.*/THIGH_\d+)_\d+\.nii\.gz'

Training_dict2 = []

for image_file in image_files:
    match = re.match(pattern, image_file)
    if match:
        thigh_id = match.group(1)
        label_file = [l for l in label_files if thigh_id in l][0]
        Training_dict2.append({'image': image_file, 'label': label_file})

print(f'Training_dict2: {(len(Training_dict2))}')

Training_dict = Training_dict1 + Training_dict2
print(f'Training_dict: {(len(Training_dict))}')

# random shuffle Training_dict 

import random
random.shuffle(Training_dict)
Training_dict[0:5]

/home/hua/Muscle_seg
Training_dict1: 262
Training_dict2: 12
Training_dict: 274


[{'image': '/home/hua/Muscle_seg/Data/Training/Water/THIGH_014_0001.nii.gz',
  'label': '/home/hua/Muscle_seg/Data/Training/Label/THIGH_014.nii.gz'},
 {'image': '/home/hua/Muscle_seg/Data/Training/STIR/THIGH_021_0004.nii.gz',
  'label': '/home/hua/Muscle_seg/Data/Training/Label/THIGH_021.nii.gz'},
 {'image': '/home/hua/Muscle_seg/Data/Training/T2/THIGH_030_0003.nii.gz',
  'label': '/home/hua/Muscle_seg/Data/Training/Label/THIGH_030.nii.gz'},
 {'image': '/home/hua/Muscle_seg/Data/Training/Fat/THIGH_054_0000.nii.gz',
  'label': '/home/hua/Muscle_seg/Data/Training/Label/THIGH_054.nii.gz'},
 {'image': '/home/hua/Muscle_seg/Data/Training/STIR/THIGH_001_0004.nii.gz',
  'label': '/home/hua/Muscle_seg/Data/Training/Label/THIGH_001.nii.gz'}]

# Helsinki (Finns)

In [3]:
directory = os.environ.get("MONAI_DATA_DIRECTORY")
root_dir = os.getcwd() if directory is None else directory
print(root_dir)


def create_dict():
    # Set the base path for the data
    base_path = os.path.join(root_dir, 'Data/Helsinki')  
    
    # Set the path for the label folder
    label_path = os.path.join(base_path, 'Label')
    
    # Find all label files
    label_files = glob.glob(os.path.join(label_path, 'THIGH_*.nii.gz'))
    
    # Initialize an empty list for the dictionary
    ddict = []
    
    # Loop through each label file
    for label_file in label_files:
        # Extract the label file name
        label_name = os.path.basename(label_file)
        
        
        # Extract the image file names for each type
        t1_files = glob.glob(os.path.join(base_path, 'T1', label_name.replace('.nii.gz', '_*.nii.gz')))
        stir_files = glob.glob(os.path.join(base_path, 'STIR', label_name.replace('.nii.gz', '_*.nii.gz')))
        

        
        # Create a dictionary for each set of files
        for i in range(len(t1_files)):
            ddict.append({'image': t1_files[i], 'label': label_file})
        for i in range(len(stir_files)):
            ddict.append({'image': stir_files[i], 'label': label_file})
    
    return ddict

Helsinki_dict = create_dict()
print(f'Helsinki_dict: {(len(Helsinki_dict))}')
Helsinki_dict[0:5]

/home/hua/Muscle_seg
Helsinki_dict: 54


[{'image': '/home/hua/Muscle_seg/Data/Helsinki/T1/THIGH_107_0000.nii.gz',
  'label': '/home/hua/Muscle_seg/Data/Helsinki/Label/THIGH_107.nii.gz'},
 {'image': '/home/hua/Muscle_seg/Data/Helsinki/STIR/THIGH_107_0001.nii.gz',
  'label': '/home/hua/Muscle_seg/Data/Helsinki/Label/THIGH_107.nii.gz'},
 {'image': '/home/hua/Muscle_seg/Data/Helsinki/T1/THIGH_108_0000.nii.gz',
  'label': '/home/hua/Muscle_seg/Data/Helsinki/Label/THIGH_108.nii.gz'},
 {'image': '/home/hua/Muscle_seg/Data/Helsinki/STIR/THIGH_108_0001.nii.gz',
  'label': '/home/hua/Muscle_seg/Data/Helsinki/Label/THIGH_108.nii.gz'},
 {'image': '/home/hua/Muscle_seg/Data/Helsinki/T1/THIGH_121_0000.nii.gz',
  'label': '/home/hua/Muscle_seg/Data/Helsinki/Label/THIGH_121.nii.gz'}]

# MyoSegmenTUM (Germans)

In [4]:
directory = os.environ.get("MONAI_DATA_DIRECTORY")
root_dir = os.getcwd() if directory is None else directory
print(root_dir)


def create_dict():
    # Set the base path for the data
    base_path = os.path.join(root_dir, 'Data/MyoSegmenTUM')  
    
    # Set the path for the label folder
    label_path = os.path.join(base_path, 'Label')
    
    # Find all label files
    label_files = glob.glob(os.path.join(label_path, '*.nii.gz'))
    
    # Initialize an empty list for the dictionary
    ddict = []
    
    # Loop through each label file
    for label_file in label_files:
        # Extract the label file name
        label_name = os.path.basename(label_file)
        
        
        # Extract the image file names for each type
        fat_files = glob.glob(os.path.join(base_path, 'Fat', label_name.replace('.nii.gz', '_*.nii.gz')))
        water_files = glob.glob(os.path.join(base_path, 'Water', label_name.replace('.nii.gz', '_*.nii.gz')))
        

        
        # Create a dictionary for each set of files
        for i in range(len(fat_files)):
            ddict.append({'image': fat_files[i], 'label': label_file})
        for i in range(len(water_files)):
            ddict.append({'image': water_files[i], 'label': label_file})
    
    return ddict

MyoSegmenTUM_dict = create_dict()
print(f'Helsinki_dict: {(len(MyoSegmenTUM_dict))}')
MyoSegmenTUM_dict[0:5]

/home/hua/Muscle_seg
Helsinki_dict: 38


[{'image': '/home/hua/Muscle_seg/Data/MyoSegmenTUM/Fat/HV001_2_0000.nii.gz',
  'label': '/home/hua/Muscle_seg/Data/MyoSegmenTUM/Label/HV001_2.nii.gz'},
 {'image': '/home/hua/Muscle_seg/Data/MyoSegmenTUM/Water/HV001_2_0001.nii.gz',
  'label': '/home/hua/Muscle_seg/Data/MyoSegmenTUM/Label/HV001_2.nii.gz'},
 {'image': '/home/hua/Muscle_seg/Data/MyoSegmenTUM/Fat/HV004_1_0000.nii.gz',
  'label': '/home/hua/Muscle_seg/Data/MyoSegmenTUM/Label/HV004_1.nii.gz'},
 {'image': '/home/hua/Muscle_seg/Data/MyoSegmenTUM/Water/HV004_1_0001.nii.gz',
  'label': '/home/hua/Muscle_seg/Data/MyoSegmenTUM/Label/HV004_1.nii.gz'},
 {'image': '/home/hua/Muscle_seg/Data/MyoSegmenTUM/Fat/P003_1_0000.nii.gz',
  'label': '/home/hua/Muscle_seg/Data/MyoSegmenTUM/Label/P003_1.nii.gz'}]

# Combination dataset

In [13]:
Combination_dict = Training_dict + Helsinki_dict + MyoSegmenTUM_dict
print (f"All MRIs: {len(Combination_dict)}")


random.shuffle(Combination_dict)

Test_dict = Combination_dict[-35:]
print (f"Test MRIs: {len(Test_dict)}")

Train_val_dict = Combination_dict[0:331]
print (f"Train and val MRIs: {len(Train_val_dict)}")



All MRIs: 366
Test MRIs: 35
Train and val MRIs: 331


In [16]:
Test_dict

[{'image': '/home/hua/Muscle_seg/Data/Training/Water/THIGH_059_0001.nii.gz',
  'label': '/home/hua/Muscle_seg/Data/Training/Label/THIGH_059.nii.gz'},
 {'image': '/home/hua/Muscle_seg/Data/MyoSegmenTUM/Water/HV003_3_0001.nii.gz',
  'label': '/home/hua/Muscle_seg/Data/MyoSegmenTUM/Label/HV003_3.nii.gz'},
 {'image': '/home/hua/Muscle_seg/Data/Training/T1/THIGH_022_0002.nii.gz',
  'label': '/home/hua/Muscle_seg/Data/Training/Label/THIGH_022.nii.gz'},
 {'image': '/home/hua/Muscle_seg/Data/Helsinki/STIR/THIGH_114_0001.nii.gz',
  'label': '/home/hua/Muscle_seg/Data/Helsinki/Label/THIGH_114.nii.gz'},
 {'image': '/home/hua/Muscle_seg/Data/Training/T1/THIGH_049_0002.nii.gz',
  'label': '/home/hua/Muscle_seg/Data/Training/Label/THIGH_049.nii.gz'},
 {'image': '/home/hua/Muscle_seg/Data/Training/Water/THIGH_026_0001.nii.gz',
  'label': '/home/hua/Muscle_seg/Data/Training/Label/THIGH_026.nii.gz'},
 {'image': '/home/hua/Muscle_seg/Data/Training/T2/THIGH_061_0003.nii.gz',
  'label': '/home/hua/Muscle_

In [15]:
# Define the number of folds
num_folds = 5


indices = list(range(len(Train_val_dict)))


# Divide the indices into num_folds equal parts
folds = [indices[i::num_folds] for i in range(num_folds)]

# Create a list of dictionaries with the desired structure
fold_data = []
for i, fold in enumerate(folds):
    for j in fold:
        data = {'fold': i, 'image': Train_val_dict[j]['image'], 'label': Train_val_dict[j]['label']}
        fold_data.append(data)

fold_data

[{'fold': 0,
  'image': '/home/hua/Muscle_seg/Data/Helsinki/T1/THIGH_116_0000.nii.gz',
  'label': '/home/hua/Muscle_seg/Data/Helsinki/Label/THIGH_116.nii.gz'},
 {'fold': 0,
  'image': '/home/hua/Muscle_seg/Data/Helsinki/T1/THIGH_120_0000.nii.gz',
  'label': '/home/hua/Muscle_seg/Data/Helsinki/Label/THIGH_120.nii.gz'},
 {'fold': 0,
  'image': '/home/hua/Muscle_seg/Data/Training/T1/THIGH_063_0002.nii.gz',
  'label': '/home/hua/Muscle_seg/Data/Training/Label/THIGH_063.nii.gz'},
 {'fold': 0,
  'image': '/home/hua/Muscle_seg/Data/Helsinki/STIR/THIGH_122_0001.nii.gz',
  'label': '/home/hua/Muscle_seg/Data/Helsinki/Label/THIGH_122.nii.gz'},
 {'fold': 0,
  'image': '/home/hua/Muscle_seg/Data/Training/Fat/THIGH_048_0000.nii.gz',
  'label': '/home/hua/Muscle_seg/Data/Training/Label/THIGH_048.nii.gz'},
 {'fold': 0,
  'image': '/home/hua/Muscle_seg/Data/MyoSegmenTUM/Water/HV009_1_0001.nii.gz',
  'label': '/home/hua/Muscle_seg/Data/MyoSegmenTUM/Label/HV009_1.nii.gz'},
 {'fold': 0,
  'image': '/home