## Datalist Generator ##


Copyright (c) MONAI Consortium
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

Created by Udbhav Ram - Cardenas Lab, University of Alabama at Birmingham for contribution towards the MONAI project


## Setup environment ##

In [22]:
!python3 -c "import monai" || pip install -q "monai"

In [23]:
import os
import json
import random
import shutil
import tempfile
from monai.config import print_config
from monai.apps import download_and_extract

print_config()

MONAI version: 1.1.0
Numpy version: 1.18.5
Pytorch version: 1.13.1
MONAI flags: HAS_EXT = False, USE_COMPILED = False, USE_META_DICT = False
MONAI rev id: a2ec3752f54bfc3b40e7952234fbeb5452ed63e3
MONAI __file__: /Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/site-packages/monai/__init__.py

Optional dependencies:
Pytorch Ignite version: NOT INSTALLED or UNKNOWN VERSION.
Nibabel version: NOT INSTALLED or UNKNOWN VERSION.
scikit-image version: 0.17.2
Pillow version: 7.2.0
Tensorboard version: 2.3.0
gdown version: NOT INSTALLED or UNKNOWN VERSION.
TorchVision version: NOT INSTALLED or UNKNOWN VERSION.
tqdm version: 4.50.0
lmdb version: NOT INSTALLED or UNKNOWN VERSION.
psutil version: 5.7.2
pandas version: 1.1.2
einops version: NOT INSTALLED or UNKNOWN VERSION.
transformers version: NOT INSTALLED or UNKNOWN VERSION.
mlflow version: NOT INSTALLED or UNKNOWN VERSION.
pynrrd version: NOT INSTALLED or UNKNOWN VERSION.

For details about installing the optional dependencies, p

In [24]:
# Construct skeleton JSON to populate with your own data
datalist_json = {
    "testing": [],
    "training": []
}

In [25]:
# Setup paths to your data
directory = os.environ.get("MONAI_DATA_DIRECTORY")
root_dir = tempfile.mkdtemp() if directory is None else directory
print(root_dir)

/var/folders/gr/5q39rjj57fs4z15w99hz0pqh0000gn/T/tmpf4k3kt1s


In [26]:
# Download sample MSD Datset
msd_task = "Task04_Hippocampus"
resource = "https://msd-for-monai.s3-us-west-2.amazonaws.com/" + msd_task + ".tar"

compressed_file = os.path.join(root_dir, msd_task + ".tar")
dataroot = os.path.join(root_dir, msd_task)
if not os.path.exists(dataroot):
    download_and_extract(resource, compressed_file, root_dir)

Task04_Hippocampus.tar: 27.1MB [00:11, 2.50MB/s]                            

2023-01-09 10:39:27,265 - INFO - Downloaded: /var/folders/gr/5q39rjj57fs4z15w99hz0pqh0000gn/T/tmpf4k3kt1s/Task04_Hippocampus.tar
2023-01-09 10:39:27,265 - INFO - Expected md5 is None, skip md5 check for file /var/folders/gr/5q39rjj57fs4z15w99hz0pqh0000gn/T/tmpf4k3kt1s/Task04_Hippocampus.tar.
2023-01-09 10:39:27,266 - INFO - Writing into directory: /var/folders/gr/5q39rjj57fs4z15w99hz0pqh0000gn/T/tmpf4k3kt1s.





In [27]:
#MSD dataset structure follows the following convention:
test_dir = os.path.join(dataroot, "imagesTs/")
train_dir = os.path.join(dataroot, "imagesTr/")
label_dir = os.path.join(dataroot, "labelsTr/")

In [28]:
for file in os.listdir(test_dir):
    datalist_json["testing"].append({
        "image": './imagesTs/' + file,
    })

In [29]:
datalist_json['testing'][:10]

[{'image': './imagesTs/hippocampus_267.nii.gz'},
 {'image': './imagesTs/hippocampus_379.nii.gz'},
 {'image': './imagesTs/hippocampus_208.nii.gz'},
 {'image': './imagesTs/hippocampus_275.nii.gz'},
 {'image': './imagesTs/hippocampus_131.nii.gz'},
 {'image': './imagesTs/hippocampus_140.nii.gz'},
 {'image': './imagesTs/hippocampus_076.nii.gz'},
 {'image': './imagesTs/hippocampus_115.nii.gz'},
 {'image': './imagesTs/hippocampus_168.nii.gz'},
 {'image': './imagesTs/hippocampus_119.nii.gz'}]

In [30]:
# Populate training with files in your directory
for file in os.listdir(train_dir):
    datalist_json["training"].append({
        "image": './imagesTr/' + file,
        "label": './labelsTr/' + file,
        "fold": 0  # Initialize as single fold
    })

In [31]:
datalist_json['training'][:10]

[{'image': './imagesTr/hippocampus_367.nii.gz',
  'label': './labelsTr/hippocampus_367.nii.gz',
  'fold': 0},
 {'image': './imagesTr/hippocampus_304.nii.gz',
  'label': './labelsTr/hippocampus_304.nii.gz',
  'fold': 0},
 {'image': './imagesTr/hippocampus_204.nii.gz',
  'label': './labelsTr/hippocampus_204.nii.gz',
  'fold': 0},
 {'image': './imagesTr/hippocampus_279.nii.gz',
  'label': './labelsTr/hippocampus_279.nii.gz',
  'fold': 0},
 {'image': './imagesTr/hippocampus_308.nii.gz',
  'label': './labelsTr/hippocampus_308.nii.gz',
  'fold': 0},
 {'image': './imagesTr/hippocampus_375.nii.gz',
  'label': './labelsTr/hippocampus_375.nii.gz',
  'fold': 0},
 {'image': './imagesTr/hippocampus_216.nii.gz',
  'label': './labelsTr/hippocampus_216.nii.gz',
  'fold': 0},
 {'image': './imagesTr/hippocampus_316.nii.gz',
  'label': './labelsTr/hippocampus_316.nii.gz',
  'fold': 0},
 {'image': './imagesTr/hippocampus_089.nii.gz',
  'label': './labelsTr/hippocampus_089.nii.gz',
  'fold': 0},
 {'image':

In [32]:
# Split training into 5 folds randomly
random.seed(42)

for i in range(5):
    for j in range(len(datalist_json["training"])):
        if random.random() < 0.2:
            datalist_json["training"][j]["fold"] = i

In [33]:
datalist_json['training'][:5]

[{'image': './imagesTr/hippocampus_367.nii.gz',
  'label': './labelsTr/hippocampus_367.nii.gz',
  'fold': 0},
 {'image': './imagesTr/hippocampus_304.nii.gz',
  'label': './labelsTr/hippocampus_304.nii.gz',
  'fold': 0},
 {'image': './imagesTr/hippocampus_204.nii.gz',
  'label': './labelsTr/hippocampus_204.nii.gz',
  'fold': 4},
 {'image': './imagesTr/hippocampus_279.nii.gz',
  'label': './labelsTr/hippocampus_279.nii.gz',
  'fold': 0},
 {'image': './imagesTr/hippocampus_308.nii.gz',
  'label': './labelsTr/hippocampus_308.nii.gz',
  'fold': 0}]

In [34]:
with open('datalist.json', 'w', encoding='utf-8') as f:
    json.dump(datalist_json, f, ensure_ascii=False, indent=4)

In [35]:
if directory is None:
    shutil.rmtree(root_dir)