In [1]:
import re
import os
import numpy as np
from yucca.functional.preprocessing import preprocess_case_for_training_with_label
from batchgenerators.utilities.file_and_folder_operations import subfiles, join, save_pickle, maybe_mkdir_p, save_json
from yucca.paths import yucca_raw_data, yucca_preprocessed_data
from yucca.utils.loading import read_file_to_nifti_or_np
from yucca.functional.preprocessing import preprocess_case_for_training_with_label
from yucca.functional.planning import make_plans_file, add_stats_to_plans_post_preprocessing
from yucca.training.managers.YuccaManager import YuccaManager

Set some variables that we'll need and create necessary paths

In [2]:
crop_to_nonzero = True
allow_missing_modalities = False
norm_op = "volume_wise_znorm"
plans_name = "demo"
extension = ".nii.gz"

raw_images_dir = join(yucca_raw_data, "Task001_OASIS/imagesTr")
raw_labels_dir = join(yucca_raw_data, "Task001_OASIS/labelsTr")

target_dir = join(yucca_preprocessed_data, "Task001_OASIS", plans_name)

maybe_mkdir_p(target_dir)

Now make a barebones plan

In [3]:
crop_to_nonzero = True
allow_missing_modalities = False
norm_op = "volume_wise_znorm"
plans_name = "demo"
modalities = ("MRI",)
task_type = "segmentation"

plans = make_plans_file(
    allow_missing_modalities=allow_missing_modalities,
    crop_to_nonzero=crop_to_nonzero,
    norm_op=norm_op,
    classes=[0, 1],
    plans_name=plans_name,
    modalities=modalities,
    task_type=task_type,
)

now preprocess the samples in the folder

In [4]:
subjects = [file[: -len(extension)] for file in subfiles(raw_labels_dir, join=False) if not file.startswith(".")]

for sub in subjects[:5]:
    # we'll just do the first 5 images in this demo
    # this still assumes raw images are stored in the yucca format images are saved as:
    # sub_XXX.ext where XXX is the modality encoding (e.g 000 and 001 if two modalities are present per subject)
    images = [
        image_path
        for image_path in subfiles(raw_images_dir)
        if re.search(re.escape(sub) + "_" + r"\d{3}" + ".", os.path.split(image_path)[-1])
    ]
    images = [read_file_to_nifti_or_np(image) for image in images]
    label = read_file_to_nifti_or_np(join(raw_labels_dir, sub + extension))
    images, label, image_props = preprocess_case_for_training_with_label(
        images=images,
        label=label,
        normalization_operation=["volume_wise_znorm"],
        allow_missing_modalities=False,
        enable_cc_analysis=False,
        crop_to_nonzero=True,
    )
    images = np.vstack((np.array(images), np.array(label)[np.newaxis]), dtype=np.float32)

    save_path = join(target_dir, sub)
    np.save(save_path + ".npy", images)
    save_pickle(image_props, save_path + ".pkl")

Add some extra metadata to the plans file

In [5]:
plans = add_stats_to_plans_post_preprocessing(plans=plans, directory=target_dir)
save_json(plans, join(target_dir, plans_name + "_plans.json"), sort_keys=False)

Create a manager (can be replaced by your own training script/class)

In [6]:
Manager = YuccaManager(
    enable_logging=False,
    ckpt_path=None,
    model_name="TinyUNet",
    model_dimensions="2D",
    num_workers=6,
    split_idx=0,
    step_logging=True,
    task="Task001_OASIS",
    planner=plans_name,
    patch_size=(32, 32),
    batch_size=2,
    max_epochs=1,
    val_batches_per_step=2,
    train_batches_per_step=2,
    accelerator="cpu",
)
Manager.run_training()

Seed set to 425142129
INFO:root:Loading plans.json
INFO:root:Getting patch size based on manual input of: (32, 32)
INFO:root:Using batch size: 2 and patch size: (32, 32)
INFO:root:Starting a segmentation task
INFO:root:Loading Model: 2D TinyUNet


Composing Transforms


  from .autonotebook import tqdm as notebook_tqdm
INFO:root:Using 6 workers
INFO:root:Using dataset class: <class 'yucca.training.data_loading.YuccaDataset.YuccaTrainDataset'> for train/val and <class 'yucca.training.data_loading.YuccaDataset.YuccaTestDataset'> for inference
INFO:root:
| module                      | #parameters or shape   | #flops     |
|:----------------------------|:-----------------------|:-----------|
| model                       | 7.562K                 | 3.437M     |
|  in_conv                    |  0.204K                |  0.451M    |
|   in_conv.conv1             |   48                   |   0.115M   |
|    in_conv.conv1.conv       |    40                  |    73.728K |
|    in_conv.conv1.norm       |    8                   |    40.96K  |
|   in_conv.conv2             |   0.156K               |   0.336M   |
|    in_conv.conv2.conv       |    0.148K              |    0.295M  |
|    in_conv.conv2.norm       |    8                   |    40.96K  |
|  encoder_co

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/Users/zcr545/miniconda3/envs/testyucca/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:436: Consider setting `persistent_workers=True` in 'val_dataloader' to speed up the dataloader worker initialization.


                                                                           

INFO:root:Starting training with data from: /Users/zcr545/Desktop/Projects/repos/yucca_data/preprocessed/Task001_OASIS/demo




/Users/zcr545/miniconda3/envs/testyucca/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:436: Consider setting `persistent_workers=True` in 'train_dataloader' to speed up the dataloader worker initialization.


Epoch 0: 100%|██████████| 2/2 [00:33<00:00,  0.06it/s, v_num=0]

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 2/2 [00:33<00:00,  0.06it/s, v_num=0]
