diff --git a/pathology/tumor_detection/README.MD b/pathology/tumor_detection/README.MD index 7fe29da5ab..5d50e75a92 100644 --- a/pathology/tumor_detection/README.MD +++ b/pathology/tumor_detection/README.MD @@ -12,12 +12,11 @@ The model is based on ResNet18 with the last fully connected layer replaced by a All the data used to train and validate this model is from [Camelyon-16 Challenge](https://camelyon16.grand-challenge.org/). You can download all the images for "CAMELYON16" data set from various sources listed [here](https://camelyon17.grand-challenge.org/Data/). -Location information for training/validation patches (the location on the whole slide image where patches are extracted) are adopted from [NCRF/coords](https://github.com/baidu-research/NCRF/tree/master/coords). The reformatted coordinations and labels are stored in a json file (`dataset_0.json`), and can be downloaded from [here](https://drive.google.com/file/d/1m2pwko6hxwsxeDWZY2oSOV-_KT97Ol0o/view?usp=sharing) +Location information for training/validation patches (the location on the whole slide image where patches are extracted) are adopted from [NCRF/coords](https://github.com/baidu-research/NCRF/tree/master/coords). The reformatted coordinations and labels in CSV format for training (`training.csv`) can be found [here](https://drive.google.com/file/d/1httIjgji6U6rMIb0P8pE0F-hXFAuvQEf/view?usp=sharing) and for validation (`validation.csv`) can be found [here](https://drive.google.com/file/d/1tJulzl9m5LUm16IeFbOCoFnaSWoB6i5L/view?usp=sharing). -This pipeline expects the training/validation data (whole slide images) reside in `cfg["data_root"]/training/images`. By default `data_root` is pointing to `/workspace/data/medical/pathology/` You can easily modify it to point to a different directory by passing the following argument in the runtime: `--data-root /other/data/root/dir/`. +This pipeline expects the training/validation data (whole slide images) reside in `cfg["data_root"]/training/images`. By default `data_root` is pointing to the code folder `./`; however, you can easily modify it to point to a different directory by passing the following argument in the runtime: `--data-root /other/data/root/dir/`. -> `dataset_0_subset_0.json` is also provided [here](https://drive.google.com/file/d/1NCd0y4FR42maQpfZjzKlFSIX4oeKgysg/view?usp=sharing) to check the functionality of the pipeline using only two of the whole slide images: `tumor_001` and `tumor_101`.
-> This dataset should not be used for the real training or any perfomance evaluation. +> [`training_sub.csv`](https://drive.google.com/file/d/1rO8ZY-TrU9nrOsx-Udn1q5PmUYrLG3Mv/view?usp=sharing) and [`validation_sub.csv`](https://drive.google.com/file/d/130pqsrc2e9wiHIImL8w4fT_5NktEGel7/view?usp=sharing) is also provided to check the functionality of the pipeline using only two of the whole slide images: `tumor_001` (for training) and `tumor_101` (for validation). This dataset should not be used for the real training or any performance evaluation. ### Input and output formats diff --git a/pathology/tumor_detection/ignite/camelyon_train_evaluate.py b/pathology/tumor_detection/ignite/camelyon_train_evaluate.py index dd670f2f63..e3a74dc8e1 100644 --- a/pathology/tumor_detection/ignite/camelyon_train_evaluate.py +++ b/pathology/tumor_detection/ignite/camelyon_train_evaluate.py @@ -1,34 +1,17 @@ -import os - import logging +import os import time from argparse import ArgumentParser import numpy as np - +import pandas as pd import torch -from torch.optim import SGD, lr_scheduler - from ignite.metrics import Accuracy +from torch.optim import SGD, lr_scheduler import monai -from monai.data import DataLoader, load_decathlon_datalist -from monai.transforms import ( - ActivationsD, - AsDiscreteD, - CastToTypeD, - Compose, - RandFlipD, - RandRotate90D, - RandZoomD, - ScaleIntensityRangeD, - ToNumpyD, - TorchVisionD, - ToTensorD, -) -from monai.utils import first, set_determinism -from monai.optimizers import Novograd -from monai.engines import SupervisedTrainer, SupervisedEvaluator +from monai.data import DataLoader, PatchWSIDataset, CSVDataset +from monai.engines import SupervisedEvaluator, SupervisedTrainer from monai.handlers import ( CheckpointSaver, LrScheduleHandler, @@ -37,10 +20,24 @@ ValidationHandler, from_engine, ) - -from monai.apps.pathology.data import PatchWSIDataset from monai.networks.nets import TorchVisionFCModel - +from monai.optimizers import Novograd +from monai.transforms import ( + Activationsd, + AsDiscreted, + CastToTyped, + Compose, + GridSplitd, + Lambdad, + RandFlipd, + RandRotate90d, + RandZoomd, + ScaleIntensityRanged, + ToNumpyd, + TorchVisiond, + ToTensord, +) +from monai.utils import first, set_determinism torch.backends.cudnn.enabled = True set_determinism(seed=0, additional_settings=None) @@ -65,7 +62,7 @@ def set_device(cfg): if gpus and torch.cuda.is_available(): os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(n) for n in gpus]) device = torch.device("cuda") - print(f'CUDA is being used with GPU ID(s): {os.environ["CUDA_VISIBLE_DEVICES"]}') + print(f'CUDA is being used with GPU Id(s): {os.environ["CUDA_VISIBLE_DEVICES"]}') else: device = torch.device("cpu") print("CPU only!") @@ -82,54 +79,66 @@ def train(cfg): # Build MONAI preprocessing train_preprocess = Compose( [ - ToTensorD(keys="image"), - TorchVisionD( + Lambdad(keys="label", func=lambda x: x.reshape((1, cfg["grid_shape"], cfg["grid_shape"]))), + GridSplitd( + keys=("image", "label"), + grid=(cfg["grid_shape"], cfg["grid_shape"]), + size={"image": cfg["patch_size"], "label": 1}, + ), + ToTensord(keys=("image")), + TorchVisiond( keys="image", name="ColorJitter", brightness=64.0 / 255.0, contrast=0.75, saturation=0.25, hue=0.04 ), - ToNumpyD(keys="image"), - RandFlipD(keys="image", prob=0.5), - RandRotate90D(keys="image", prob=0.5), - CastToTypeD(keys="image", dtype=np.float32), - RandZoomD(keys="image", prob=0.5, min_zoom=0.9, max_zoom=1.1), - ScaleIntensityRangeD(keys="image", a_min=0.0, a_max=255.0, b_min=-1.0, b_max=1.0), - ToTensorD(keys=("image", "label")), + ToNumpyd(keys="image"), + RandFlipd(keys="image", prob=0.5), + RandRotate90d(keys="image", prob=0.5, max_k=3, spatial_axes=(-2, -1)), + CastToTyped(keys="image", dtype=np.float32), + RandZoomd(keys="image", prob=0.5, min_zoom=0.9, max_zoom=1.1), + ScaleIntensityRanged(keys="image", a_min=0.0, a_max=255.0, b_min=-1.0, b_max=1.0), + ToTensord(keys=("image", "label")), ] ) valid_preprocess = Compose( [ - CastToTypeD(keys="image", dtype=np.float32), - ScaleIntensityRangeD(keys="image", a_min=0.0, a_max=255.0, b_min=-1.0, b_max=1.0), - ToTensorD(keys=("image", "label")), + Lambdad(keys="label", func=lambda x: x.reshape((1, cfg["grid_shape"], cfg["grid_shape"]))), + GridSplitd( + keys=("image", "label"), + grid=(cfg["grid_shape"], cfg["grid_shape"]), + size={"image": cfg["patch_size"], "label": 1}, + ), + CastToTyped(keys="image", dtype=np.float32), + ScaleIntensityRanged(keys="image", a_min=0.0, a_max=255.0, b_min=-1.0, b_max=1.0), + ToTensord(keys=("image", "label")), ] ) # __________________________________________________________________________ # Create MONAI dataset - train_json_info_list = load_decathlon_datalist( - data_list_file_path=cfg["dataset_json"], - data_list_key="training", - base_dir=cfg["data_root"], + train_data_list = CSVDataset( + cfg["train_file"], + col_groups={"image": 0, "patch_location": [2, 1], "label": [3, 6, 9, 4, 7, 10, 5, 8, 11]}, + kwargs_read_csv={"header": None}, + transform=Lambdad("image", lambda x: os.path.join(cfg["root"], "training/images", x + ".tif")), ) - valid_json_info_list = load_decathlon_datalist( - data_list_file_path=cfg["dataset_json"], - data_list_key="validation", - base_dir=cfg["data_root"], + train_dataset = PatchWSIDataset( + data=train_data_list, + patch_size=cfg["region_size"], + patch_level=0, + transform=train_preprocess, + reader="openslide" if cfg["use_openslide"] else "cuCIM", ) - train_dataset = PatchWSIDataset( - train_json_info_list, - cfg["region_size"], - cfg["grid_shape"], - cfg["patch_size"], - train_preprocess, - image_reader_name="openslide" if cfg["use_openslide"] else "cuCIM", + valid_data_list = CSVDataset( + cfg["valid_file"], + col_groups={"image": 0, "patch_location": [2, 1], "label": [3, 6, 9, 4, 7, 10, 5, 8, 11]}, + kwargs_read_csv={"header": None}, + transform=Lambdad("image", lambda x: os.path.join(cfg["root"], "training/images", x + ".tif")), ) valid_dataset = PatchWSIDataset( - valid_json_info_list, - cfg["region_size"], - cfg["grid_shape"], - cfg["patch_size"], - valid_preprocess, - image_reader_name="openslide" if cfg["use_openslide"] else "cuCIM", + data=valid_data_list, + patch_size=cfg["region_size"], + patch_level=0, + transform=valid_preprocess, + reader="openslide" if cfg["use_openslide"] else "cuCIM", ) # __________________________________________________________________________ @@ -141,12 +150,10 @@ def train(cfg): valid_dataset, num_workers=cfg["num_workers"], batch_size=cfg["batch_size"], pin_memory=True ) - # __________________________________________________________________________ - # Get sample batch and some info + # Check first sample first_sample = first(train_dataloader) if first_sample is None: - raise ValueError("Fist sample is None!") - + raise ValueError("First sample is None!") print("image: ") print(" shape", first_sample["image"].shape) print(" type: ", type(first_sample["image"])) @@ -194,9 +201,7 @@ def train(cfg): StatsHandler(output_transform=lambda x: None), TensorBoardStatsHandler(log_dir=log_dir, output_transform=lambda x: None), ] - val_postprocessing = Compose( - [ActivationsD(keys="pred", sigmoid=True), AsDiscreteD(keys="pred", threshold=0.5)] - ) + val_postprocessing = Compose([Activationsd(keys="pred", sigmoid=True), AsDiscreted(keys="pred", threshold=0.5)]) evaluator = SupervisedEvaluator( device=device, val_data_loader=valid_dataloader, @@ -219,9 +224,7 @@ def train(cfg): log_dir=cfg["logdir"], tag_name="train_loss", output_transform=from_engine(["loss"], first=True) ), ] - train_postprocessing = Compose( - [ActivationsD(keys="pred", sigmoid=True), AsDiscreteD(keys="pred", threshold=0.5)] - ) + train_postprocessing = Compose([Activationsd(keys="pred", sigmoid=True), AsDiscreted(keys="pred", threshold=0.5)]) trainer = SupervisedTrainer( device=device, @@ -241,24 +244,18 @@ def train(cfg): def main(): logging.basicConfig(level=logging.INFO) parser = ArgumentParser(description="Tumor detection on whole slide pathology images.") - parser.add_argument( - "--dataset", - type=str, - default="../dataset_0.json", - dest="dataset_json", - help="path to dataset json file", - ) parser.add_argument( "--root", type=str, - default="/workspace/data/medical/pathology/", - dest="data_root", - help="path to root folder of images containing training folder", + default="/workspace/data/medical/pathology", + help="path to image folder containing training/validation", ) + parser.add_argument("--train-file", type=str, default="training.csv", help="path to training data file") + parser.add_argument("--valid-file", type=str, default="validation.csv", help="path to training data file") parser.add_argument("--logdir", type=str, default="./logs/", dest="logdir", help="log directory") parser.add_argument("--rs", type=int, default=256 * 3, dest="region_size", help="region size") - parser.add_argument("--gs", type=int, default=3, dest="grid_shape", help="image grid shape (3x3)") + parser.add_argument("--gs", type=int, default=3, dest="grid_shape", help="image grid shape e.g 3 means 3x3") parser.add_argument("--ps", type=int, default=224, dest="patch_size", help="patch size") parser.add_argument("--bs", type=int, default=64, dest="batch_size", help="batch size") parser.add_argument("--ep", type=int, default=10, dest="n_epochs", help="number of epochs") diff --git a/pathology/tumor_detection/ignite/camelyon_train_evaluate_nvtx_profiling.py b/pathology/tumor_detection/ignite/camelyon_train_evaluate_nvtx_profiling.py index 8e62be68f5..581191d30f 100644 --- a/pathology/tumor_detection/ignite/camelyon_train_evaluate_nvtx_profiling.py +++ b/pathology/tumor_detection/ignite/camelyon_train_evaluate_nvtx_profiling.py @@ -3,41 +3,42 @@ import time from argparse import ArgumentParser -import monai import numpy as np +import pandas as pd import torch -from monai.apps.pathology.data import PatchWSIDataset -from monai.data import DataLoader, load_decathlon_datalist +from ignite.metrics import Accuracy +from torch.optim import SGD, lr_scheduler + +import monai +from monai.data import DataLoader, PatchWSIDataset, CSVDataset from monai.engines import SupervisedEvaluator, SupervisedTrainer from monai.handlers import ( CheckpointSaver, LrScheduleHandler, + RangeHandler, StatsHandler, TensorBoardStatsHandler, ValidationHandler, from_engine, ) -from monai.handlers.nvtx_handlers import RangeHandler from monai.networks.nets import TorchVisionFCModel from monai.optimizers import Novograd from monai.transforms import ( - ActivationsD, - AsDiscreteD, - CastToTypeD, + Activationsd, + AsDiscreted, + CastToTyped, Compose, - RandFlipD, - RandRotate90D, - RandZoomD, - ScaleIntensityRangeD, - ToNumpyD, - TorchVisionD, - ToTensorD, + GridSplitd, + Lambdad, + RandFlipd, + RandRotate90d, + RandZoomd, + ScaleIntensityRanged, + ToNumpyd, + TorchVisiond, + ToTensord, ) -from monai.transforms.nvtx import RangePopD, RangePushD -from monai.utils import Range, first, set_determinism -from torch.optim import SGD, lr_scheduler - -from ignite.metrics import Accuracy +from monai.utils import first, set_determinism, Range torch.backends.cudnn.enabled = True set_determinism(seed=0, additional_settings=None) @@ -62,7 +63,7 @@ def set_device(cfg): if gpus and torch.cuda.is_available(): os.environ["CUDA_VISIBLE_DEVICES"] = ",".join([str(n) for n in gpus]) device = torch.device("cuda") - print(f'CUDA is being used with GPU ID(s): {os.environ["CUDA_VISIBLE_DEVICES"]}') + print(f'CUDA is being used with GPU Id(s): {os.environ["CUDA_VISIBLE_DEVICES"]}') else: device = torch.device("cpu") print("CPU only!") @@ -79,85 +80,82 @@ def train(cfg): # Build MONAI preprocessing train_preprocess = Compose( [ - Range()(ToTensorD(keys="image")), - Range("ColorJitter")( - TorchVisionD( - keys="image", - name="ColorJitter", - brightness=64.0 / 255.0, - contrast=0.75, - saturation=0.25, - hue=0.04, - ) + Lambdad(keys="label", func=lambda x: x.reshape((1, cfg["grid_shape"], cfg["grid_shape"]))), + GridSplitd( + keys=("image", "label"), + grid=(cfg["grid_shape"], cfg["grid_shape"]), + size={"image": cfg["patch_size"], "label": 1}, + ), + ToTensord(keys=("image")), + TorchVisiond( + keys="image", name="ColorJitter", brightness=64.0 / 255.0, contrast=0.75, saturation=0.25, hue=0.04 ), - Range()(ToNumpyD(keys="image")), - Range()(RandFlipD(keys="image", prob=0.5)), - Range()(RandRotate90D(keys="image", prob=0.5)), - Range()(CastToTypeD(keys="image", dtype=np.float32)), - Range()(RandZoomD(keys="image", prob=0.5, min_zoom=0.9, max_zoom=1.1)), - Range()(ScaleIntensityRangeD(keys="image", a_min=0.0, a_max=255.0, b_min=-1.0, b_max=1.0)), - ToTensorD(keys=("image", "label")), + ToNumpyd(keys="image"), + RandFlipd(keys="image", prob=0.5), + RandRotate90d(keys="image", prob=0.5, max_k=3, spatial_axes=(-2, -1)), + CastToTyped(keys="image", dtype=np.float32), + RandZoomd(keys="image", prob=0.5, min_zoom=0.9, max_zoom=1.1), + ScaleIntensityRanged(keys="image", a_min=0.0, a_max=255.0, b_min=-1.0, b_max=1.0), + ToTensord(keys=("image", "label")), ] ) - train_preprocess = Range("Preprocessing")(train_preprocess) + train_preprocess = Range("Preprocessing", recursive=True)(train_preprocess) valid_preprocess = Compose( [ - CastToTypeD(keys="image", dtype=np.float32), - ScaleIntensityRangeD(keys="image", a_min=0.0, a_max=255.0, b_min=-1.0, b_max=1.0), - ToTensorD(keys=("image", "label")), + Lambdad(keys="label", func=lambda x: x.reshape((1, cfg["grid_shape"], cfg["grid_shape"]))), + GridSplitd( + keys=("image", "label"), + grid=(cfg["grid_shape"], cfg["grid_shape"]), + size={"image": cfg["patch_size"], "label": 1}, + ), + CastToTyped(keys="image", dtype=np.float32), + ScaleIntensityRanged(keys="image", a_min=0.0, a_max=255.0, b_min=-1.0, b_max=1.0), + ToTensord(keys=("image", "label")), ] ) # __________________________________________________________________________ # Create MONAI dataset - train_json_info_list = load_decathlon_datalist( - data_list_file_path=cfg["dataset_json"], - data_list_key="training", - base_dir=cfg["data_root"], + train_data_list = CSVDataset( + cfg["train_file"], + col_groups={"image": 0, "patch_location": [2, 1], "label": [3, 6, 9, 4, 7, 10, 5, 8, 11]}, + kwargs_read_csv={"header": None}, + transform=Lambdad("image", lambda x: os.path.join(cfg["root"], "training/images", x + ".tif")), ) - valid_json_info_list = load_decathlon_datalist( - data_list_file_path=cfg["dataset_json"], - data_list_key="validation", - base_dir=cfg["data_root"], + train_dataset = PatchWSIDataset( + data=train_data_list, + patch_size=cfg["region_size"], + patch_level=0, + transform=train_preprocess, + reader="openslide" if cfg["use_openslide"] else "cuCIM", ) - train_dataset = PatchWSIDataset( - train_json_info_list, - cfg["region_size"], - cfg["grid_shape"], - cfg["patch_size"], - train_preprocess, - image_reader_name="openslide" if cfg["use_openslide"] else "cuCIM", + valid_data_list = CSVDataset( + cfg["valid_file"], + col_groups={"image": 0, "patch_location": [2, 1], "label": [3, 6, 9, 4, 7, 10, 5, 8, 11]}, + kwargs_read_csv={"header": None}, + transform=Lambdad("image", lambda x: os.path.join(cfg["root"], "training/images", x + ".tif")), ) valid_dataset = PatchWSIDataset( - valid_json_info_list, - cfg["region_size"], - cfg["grid_shape"], - cfg["patch_size"], - valid_preprocess, - image_reader_name="openslide" if cfg["use_openslide"] else "cuCIM", + data=valid_data_list, + patch_size=cfg["region_size"], + patch_level=0, + transform=valid_preprocess, + reader="openslide" if cfg["use_openslide"] else "cuCIM", ) # __________________________________________________________________________ # DataLoaders train_dataloader = DataLoader( - train_dataset, - num_workers=cfg["num_workers"], - batch_size=cfg["batch_size"], - pin_memory=True, + train_dataset, num_workers=cfg["num_workers"], batch_size=cfg["batch_size"], pin_memory=True ) valid_dataloader = DataLoader( - valid_dataset, - num_workers=cfg["num_workers"], - batch_size=cfg["batch_size"], - pin_memory=True, + valid_dataset, num_workers=cfg["num_workers"], batch_size=cfg["batch_size"], pin_memory=True ) - # __________________________________________________________________________ - # Get sample batch and some info + # Check first sample first_sample = first(train_dataloader) if first_sample is None: - raise ValueError("Fist sample is None!") - + raise ValueError("First sample is None!") print("image: ") print(" shape", first_sample["image"].shape) print(" type: ", type(first_sample["image"])) @@ -206,12 +204,7 @@ def train(cfg): StatsHandler(output_transform=lambda x: None), TensorBoardStatsHandler(log_dir=log_dir, output_transform=lambda x: None), ] - val_postprocessing = Compose( - [ - ActivationsD(keys="pred", sigmoid=True), - AsDiscreteD(keys="pred", threshold=0.5), - ] - ) + val_postprocessing = Compose([Activationsd(keys="pred", sigmoid=True), AsDiscreted(keys="pred", threshold=0.5)]) evaluator = SupervisedEvaluator( device=device, val_data_loader=valid_dataloader, @@ -228,27 +221,16 @@ def train(cfg): RangeHandler("Batch"), LrScheduleHandler(lr_scheduler=scheduler, print_lr=True), CheckpointSaver( - save_dir=cfg["logdir"], - save_dict={"net": model, "opt": optimizer}, - save_interval=1, - epoch_level=True, + save_dir=cfg["logdir"], save_dict={"net": model, "opt": optimizer}, save_interval=1, epoch_level=True ), StatsHandler(tag_name="train_loss", output_transform=from_engine(["loss"], first=True)), ValidationHandler(validator=evaluator, interval=1, epoch_level=True), TensorBoardStatsHandler( - log_dir=cfg["logdir"], - tag_name="train_loss", - output_transform=from_engine(["loss"], first=True), + log_dir=cfg["logdir"], tag_name="train_loss", output_transform=from_engine(["loss"], first=True) ), ] - train_postprocessing = Compose( - [ - RangePushD("Postprocessing"), - Range()(ActivationsD(keys="pred", sigmoid=True)), - Range()(AsDiscreteD(keys="pred", threshold=0.5)), - RangePopD(), - ] - ) + train_postprocessing = Compose([Activationsd(keys="pred", sigmoid=True), AsDiscreted(keys="pred", threshold=0.5)]) + train_postprocessing = Range("Postprocessing", recursive=True)(train_postprocessing) trainer = SupervisedTrainer( device=device, @@ -268,24 +250,18 @@ def train(cfg): def main(): logging.basicConfig(level=logging.INFO) parser = ArgumentParser(description="Tumor detection on whole slide pathology images.") - parser.add_argument( - "--dataset", - type=str, - default="../dataset_0.json", - dest="dataset_json", - help="path to dataset json file", - ) parser.add_argument( "--root", type=str, - default="/workspace/data/medical/pathology/", - dest="data_root", - help="path to root folder of images containing training folder", + default="/workspace/data/medical/pathology", + help="path to image folder containing training/validation", ) + parser.add_argument("--train-file", type=str, default="training.csv", help="path to training data file") + parser.add_argument("--valid-file", type=str, default="validation.csv", help="path to training data file") parser.add_argument("--logdir", type=str, default="./logs/", dest="logdir", help="log directory") parser.add_argument("--rs", type=int, default=256 * 3, dest="region_size", help="region size") - parser.add_argument("--gs", type=int, default=3, dest="grid_shape", help="image grid shape (3x3)") + parser.add_argument("--gs", type=int, default=3, dest="grid_shape", help="image grid shape e.g 3 means 3x3") parser.add_argument("--ps", type=int, default=224, dest="patch_size", help="patch size") parser.add_argument("--bs", type=int, default=64, dest="batch_size", help="batch size") parser.add_argument("--ep", type=int, default=10, dest="n_epochs", help="number of epochs") @@ -293,20 +269,10 @@ def main(): parser.add_argument("--openslide", action="store_true", dest="use_openslide", help="use OpenSlide") parser.add_argument("--no-amp", action="store_false", dest="amp", help="deactivate amp") - parser.add_argument( - "--no-novograd", - action="store_false", - dest="novograd", - help="deactivate novograd optimizer", - ) - parser.add_argument( - "--no-pretrain", - action="store_false", - dest="pretrain", - help="deactivate Imagenet weights", - ) + parser.add_argument("--no-novograd", action="store_false", dest="novograd", help="deactivate novograd optimizer") + parser.add_argument("--no-pretrain", action="store_false", dest="pretrain", help="deactivate Imagenet weights") - parser.add_argument("--cpu", type=int, default=0, dest="num_workers", help="number of workers") + parser.add_argument("--cpu", type=int, default=8, dest="num_workers", help="number of workers") parser.add_argument("--gpu", type=str, default="0", dest="gpu", help="which gpu to use") args = parser.parse_args() diff --git a/pathology/tumor_detection/ignite/profiling_camelyon_pipeline.ipynb b/pathology/tumor_detection/ignite/profiling_camelyon_pipeline.ipynb index 5a726239ee..657c98f9db 100644 --- a/pathology/tumor_detection/ignite/profiling_camelyon_pipeline.ipynb +++ b/pathology/tumor_detection/ignite/profiling_camelyon_pipeline.ipynb @@ -73,9 +73,9 @@ "source": [ "### Download data\n", "\n", - "The pipeline that we are profiling `camelyon_train_evaluate_nvtx_profiling.py` required [Camelyon-16 Challenge](https://camelyon16.grand-challenge.org/) dataset. You can download all the images for \"CAMELYON16\" data set from sources listed [here](https://camelyon17.grand-challenge.org/Data/), as well as the coordinations and labels (`dataset_0.json`), from [here](/view?usp=sharing)\n", + "The pipeline that we are profiling `camelyon_train_evaluate_nvtx_profiling.py` required [Camelyon-16 Challenge](https://camelyon16.grand-challenge.org/) dataset. You can download all the images for \"CAMELYON16\" data set from sources listed [here](https://camelyon17.grand-challenge.org/Data/). Also you can find the coordinations and labels for training (`training.csv`) [here](https://drive.google.com/file/d/1httIjgji6U6rMIb0P8pE0F-hXFAuvQEf/view?usp=sharing) and for validation (`validation.csv`) [here](https://drive.google.com/file/d/1tJulzl9m5LUm16IeFbOCoFnaSWoB6i5L/view?usp=sharing).\n", "\n", - "However, for the demo of this notebook, we are downloading a very small subset of Camelyon dataaset, which uses only one whole slide image `tumor_091.tif` .\n" + "However, for the demo of this notebook, we are downloading a very small subset of Camelyon dataset, which uses only one whole slide image `tumor_091.tif` .\n" ] }, { @@ -88,13 +88,13 @@ "output_type": "stream", "text": [ "Downloading...\n", - "From: https://drive.google.com/uc?id=1F-lR9tXoFkPkC1yueM-_TyaFk3CO7v0s\n", - "To: /home/bhashemian/workspace/tutorials/pathology/tumor_detection/ignite/dataset_0.json\n", - "100%|██████████| 1.10M/1.10M [00:00<00:00, 14.2MB/s]\n", + "From: https://drive.google.com/uc?id=1uWS4CXKD-NP_6-SgiQbQfhFMzbs0UJIr\n", + "To: /Users/bhashemian/workspace/tutorials/pathology/tumor_detection/ignite/training.csv\n", + "100%|██████████| 153k/153k [00:00<00:00, 1.91MB/s]\n", "Downloading...\n", "From: https://drive.google.com/uc?id=1OxAeCMVqH9FGpIWpAXSEJe6cLinEGQtF\n", - "To: /home/bhashemian/workspace/tutorials/pathology/tumor_detection/ignite/training/images/tumor_091.tif\n", - "546MB [00:05, 106MB/s]\n" + "To: /Users/bhashemian/workspace/tutorials/pathology/tumor_detection/ignite/training/images/tumor_091.tif\n", + "100%|██████████| 546M/546M [00:22<00:00, 24.1MB/s] \n" ] }, { @@ -109,9 +109,9 @@ } ], "source": [ - "# Download datset.json\n", - "dataset_url = \"https://drive.google.com/uc?id=1F-lR9tXoFkPkC1yueM-_TyaFk3CO7v0s\"\n", - "dataset_path = \"dataset_0.json\"\n", + "# Download training.csv\n", + "dataset_url = \"https://drive.google.com/uc?id=1uWS4CXKD-NP_6-SgiQbQfhFMzbs0UJIr\"\n", + "dataset_path = \"training.csv\"\n", "gdown.download(dataset_url, dataset_path, quiet=False)\n", "\n", "# Download images\n", @@ -132,74 +132,23 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Warning: LBR backtrace method is not supported on this platform. DWARF backtrace method will be used.\n", - "{'dataset_json': './dataset_0.json', 'data_root': './', 'logdir': './logs/', 'region_size': 768, 'grid_shape': 3, 'patch_size': 224, 'batch_size': 64, 'n_epochs': 10, 'lr': 0.001, 'use_openslide': False, 'amp': True, 'novograd': True, 'pretrain': True, 'num_workers': 0, 'gpu': '0'}\n", - "Logs and model are saved at './logs/220223-191443_resnet18_ps224_bs64_ep10_lr0.001'.\n", - "CUDA is being used with GPU ID(s): 0\n", - "[Plugin: cucim.kit.cuslide] Loading the dynamic library from: /opt/conda/lib/python3.8/site-packages/cucim/clara/cucim.kit.cuslide@21.10.01.so\n", - "Initializing plugin: cucim.kit.cuslide (interfaces: [cucim::io::IImageFormat v0.1]) (impl: cucim.kit.cuslide)\n", - "[Plugin: cucim.kit.cumed] Loading the dynamic library from: /opt/conda/lib/python3.8/site-packages/cucim/clara/cucim.kit.cumed@21.10.01.so\n", - "Initializing plugin: cucim.kit.cumed (interfaces: [cucim::io::IImageFormat v0.1]) (impl: cucim.kit.cumed)\n", - "image: \n", - " shape torch.Size([576, 3, 224, 224])\n", - " type: \n", - " dtype: torch.float32\n", - "labels: \n", - " shape torch.Size([576, 1, 1, 1])\n", - " type: \n", - " dtype: torch.float32\n", - "batch size: 64\n", - "train number of batches: 47\n", - "valid number of batches: 0\n", - "INFO:ignite.engine.engine.SupervisedTrainer:Engine run resuming from iteration 0, epoch 0 until 10 epochs\n", - "INFO:ignite.engine.engine.SupervisedTrainer:Epoch: 1/10, Iter: 1/47 -- train_loss: 0.6409 \n", - "Collecting data...\n", - "INFO:ignite.engine.engine.SupervisedTrainer:Epoch: 1/10, Iter: 2/47 -- train_loss: 0.7057 \n", - "INFO:ignite.engine.engine.SupervisedTrainer:Epoch: 1/10, Iter: 3/47 -- train_loss: 0.7131 \n", - "INFO:ignite.engine.engine.SupervisedTrainer:Epoch: 1/10, Iter: 4/47 -- train_loss: 0.6571 \n", - "INFO:ignite.engine.engine.SupervisedTrainer:Epoch: 1/10, Iter: 5/47 -- train_loss: 0.6917 \n", - "INFO:ignite.engine.engine.SupervisedTrainer:Epoch: 1/10, Iter: 6/47 -- train_loss: 0.6641 \n", - "INFO:ignite.engine.engine.SupervisedTrainer:Epoch: 1/10, Iter: 7/47 -- train_loss: 0.6660 \n", - "INFO:ignite.engine.engine.SupervisedTrainer:Epoch: 1/10, Iter: 8/47 -- train_loss: 0.6686 \n", - "INFO:ignite.engine.engine.SupervisedTrainer:Epoch: 1/10, Iter: 9/47 -- train_loss: 0.6390 \n", - "INFO:ignite.engine.engine.SupervisedTrainer:Epoch: 1/10, Iter: 10/47 -- train_loss: 0.6890 \n", - "Processing events...\n", - "INFO:ignite.engine.engine.SupervisedTrainer:Epoch: 1/10, Iter: 11/47 -- train_loss: 0.6213 \n", - "INFO:ignite.engine.engine.SupervisedTrainer:Epoch: 1/10, Iter: 12/47 -- train_loss: 0.6279 \n", - "Saving temporary \"/tmp/nsys-report-a4ba-75f3-bf1b-c100.qdstrm\" file to disk...\n", - "\n", - "\n", - "Sent signal 15 (Terminated) to target application's process group ID = 149165.\n", - "Use the kill option to modify this behavior.\n", - "Creating final output files...\n", - "Processing [1% ]\n", - "The target application terminated with signal 15 (SIGTERM)\n", - "Processing [===============================================================100%]\n", - "Saved report file to \"/tmp/nsys-report-a4ba-75f3-bf1b-c100.qdrep\"\n", - "Report file moved to \"/home/bhashemian/workspace/tutorials/pathology/tumor_detection/ignite/profile_report.qdrep\"\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "!nsys profile \\\n", " --trace nvtx,osrt,cudnn,cuda, \\\n", " --delay 15 \\\n", - " --duration 45 \\\n", + " --duration 60 \\\n", " --show-output true \\\n", " --force-overwrite true \\\n", - " --output profile_report \\\n", + " --output profile_report.nsys-rep \\\n", " python camelyon_train_evaluate_nvtx_profiling.py \\\n", " --cpu 0 \\\n", - " --dataset ./dataset_0.json \\\n", - " --root ./" + " --train-file ./training.csv \\\n", + " --valid-file ./training.csv \\\n", + " --root ./ \\\n", + " --bs 10" ] }, { @@ -211,35 +160,38 @@ "name": "stdout", "output_type": "stream", "text": [ - "Generating SQLite file profile_report.sqlite from profile_report.qdrep\n", - "Exporting 450058 events: [=================================================100%]\n", + "Generating SQLite file profile_report.sqlite from profile_report.nsys-rep\n", + "Exporting 265495 events: [=================================================100%]\n", "Using profile_report.sqlite for SQL queries.\n", - "Running [/usr/local/cuda-11.5/NsightSystems-cli-2021.3.2/target-linux-x64/reports/nvtxppsum.py profile_report.sqlite]... \n", + "Running [/usr/local/cuda-11.6/NsightSystems-cli-2021.5.2/target-linux-x64/reports/nvtxppsum.py profile_report.sqlite]... \n", "\n", - "+---------+-----------------+-----------+--------------+--------------+--------------+--------------+----------------------+\n", - "| Time(%) | Total Time (ns) | Instances | Average (ns) | Minimum (ns) | Maximum (ns) | StdDev (ns) | Range |\n", - "+---------+-----------------+-----------+--------------+--------------+--------------+--------------+----------------------+\n", - "| 34.4 | 40036304081 | 9 | 4448478231.2 | 782631451 | 10036025423 | 2388320102.6 | Batch |\n", - "| 30.3 | 35191327503 | 525 | 67031100.0 | 39060250 | 5297236602 | 229442683.8 | Preprocessing |\n", - "| 26.7 | 31077190736 | 4724 | 6578575.5 | 2991869 | 5222838076 | 76115696.2 | ColorJitter |\n", - "| 4.0 | 4666811337 | 8 | 583351417.1 | 543904689 | 669988364 | 46262035.8 | Iteration |\n", - "| 1.3 | 1493595476 | 4716 | 316708.1 | 12442 | 4689389 | 547614.2 | RandZoomd |\n", - "| 1.1 | 1263019896 | 4716 | 267815.9 | 215967 | 692103 | 41615.4 | ScaleIntensityRanged |\n", - "| 0.6 | 650986980 | 5184 | 125576.2 | 114374 | 339141 | 11487.2 | Postprocessing |\n", - "| 0.3 | 391508855 | 5184 | 75522.5 | 68872 | 154239 | 6759.0 | AsDiscreted |\n", - "| 0.3 | 359299648 | 4716 | 76187.4 | 50648 | 235844 | 16864.6 | ToNumpyd |\n", - "| 0.3 | 299120139 | 4716 | 63426.7 | 33967 | 131942 | 8541.3 | CastToTyped |\n", - "| 0.2 | 258728731 | 4716 | 54861.9 | 8278 | 994322 | 56994.7 | RandRotate90d |\n", - "| 0.2 | 232628941 | 5184 | 44874.4 | 40170 | 172579 | 5427.0 | Activationsd |\n", - "| 0.2 | 175299329 | 4716 | 37171.2 | 2872 | 230035 | 34228.3 | RandFlipd |\n", - "| 0.1 | 62369507 | 8 | 7796188.4 | 7069163 | 10072025 | 1083591.7 | ResNet18 |\n", - "| 0.1 | 59502039 | 4725 | 12593.0 | 6503 | 932484 | 18170.6 | ToTensord |\n", - "| 0.0 | 1964335 | 8 | 245541.9 | 220073 | 305026 | 31930.6 | Loss |\n", - "+---------+-----------------+-----------+--------------+--------------+--------------+--------------+----------------------+\n", + "+----------+-----------------+-----------+--------------+--------------+------------+-------------+--------------+--------------------------+\n", + "| Time (%) | Total Time (ns) | Instances | Avg (ns) | Med (ns) | Min (ns) | Max (ns) | StdDev (ns) | Range |\n", + "+----------+-----------------+-----------+--------------+--------------+------------+-------------+--------------+--------------------------+\n", + "| 28.7 | 33706579200 | 5 | 6741315840.0 | 6324451800.0 | 176995000 | 13682363400 | 4889241407.0 | Iteration |\n", + "| 21.3 | 25011936300 | 5 | 5002387260.0 | 4787481900.0 | 2873517700 | 8072929200 | 2035498721.7 | Batch |\n", + "| 20.3 | 23839370600 | 50 | 476787412.0 | 376589400.0 | 220633100 | 1154097400 | 276441893.0 | Preprocessing |\n", + "| 19.3 | 22742525900 | 450 | 50538946.4 | 36570950.0 | 18874200 | 202062000 | 36166736.1 | TorchVisiond_ColorJitter |\n", + "| 9.4 | 11044461700 | 5 | 2208892340.0 | 1996530400.0 | 148099300 | 4407900900 | 1534918799.1 | ResNet18 |\n", + "| 0.3 | 384269900 | 450 | 853933.1 | 65400.0 | 21000 | 22487800 | 2212634.3 | RandZoomd |\n", + "| 0.2 | 244892100 | 450 | 544204.7 | 441950.0 | 321800 | 8677700 | 541248.4 | ScaleIntensityRanged |\n", + "| 0.1 | 128083500 | 450 | 284630.0 | 243900.0 | 187400 | 4721600 | 230932.6 | Postprocessing |\n", + "| 0.1 | 91848800 | 450 | 204108.4 | 176450.0 | 128700 | 745700 | 87187.5 | ToNumpyd |\n", + "| 0.1 | 65417500 | 450 | 145372.2 | 117000.0 | 90200 | 4613300 | 219185.3 | AsDiscreted |\n", + "| 0.1 | 59017500 | 450 | 131150.0 | 82250.0 | 17400 | 1050600 | 123950.6 | RandRotate90d |\n", + "| 0.0 | 55917100 | 50 | 1118342.0 | 882450.0 | 685900 | 5798000 | 801880.3 | GridSplitd |\n", + "| 0.0 | 54120900 | 450 | 120268.7 | 100700.0 | 68500 | 721200 | 59828.4 | ToTensord |\n", + "| 0.0 | 51677300 | 450 | 114838.4 | 101350.0 | 67500 | 1154000 | 60160.7 | CastToTyped |\n", + "| 0.0 | 50674700 | 450 | 112610.4 | 95650.0 | 71300 | 613700 | 51643.2 | ToTensord_2 |\n", + "| 0.0 | 48966300 | 450 | 108814.0 | 95900.0 | 75200 | 428700 | 44791.2 | Activationsd |\n", + "| 0.0 | 39524100 | 450 | 87831.3 | 45950.0 | 7600 | 2748400 | 153536.8 | RandFlipd |\n", + "| 0.0 | 2460800 | 50 | 49216.0 | 39450.0 | 28700 | 146100 | 23219.6 | Lambdad |\n", + "| 0.0 | 1074200 | 4 | 268550.0 | 194150.0 | 142200 | 543700 | 186523.3 | Loss |\n", + "+----------+-----------------+-----------+--------------+--------------+------------+-------------+--------------+--------------------------+\n", "\n", - "Running [/usr/local/cuda-11.5/NsightSystems-cli-2021.3.2/target-linux-x64/reports/nvtxppsum.py profile_report.sqlite] to [profile_report_nvtxppsum.csv]... PROCESSED\n", + "Running [/usr/local/cuda-11.6/NsightSystems-cli-2021.5.2/target-linux-x64/reports/nvtxppsum.py profile_report.sqlite] to [profile_report_nvtxppsum.csv]... PROCESSED\n", "\n", - "Running [/usr/local/cuda-11.5/NsightSystems-cli-2021.3.2/target-linux-x64/reports/nvtxpptrace.py profile_report.sqlite] to [profile_report_nvtxpptrace.csv]... PROCESSED\n", + "Running [/usr/local/cuda-11.6/NsightSystems-cli-2021.5.2/target-linux-x64/reports/nvtxpptrace.py profile_report.sqlite] to [profile_report_nvtxpptrace.csv]... PROCESSED\n", "\n" ] } @@ -250,7 +202,7 @@ " --format table,csv \\\n", " --output -,. \\\n", " --force-overwrite true \\\n", - " profile_report.qdrep" + " profile_report.nsys-rep" ] }, { @@ -271,13 +223,16 @@ "# Ordered list of NVTX range for all training transforms\n", "transforms = [\n", " \"ToTensord\",\n", - " \"ColorJitter\",\n", + " \"Lambdad\",\n", + " \"GridSplitd\",\n", + " \"TorchVisiond_ColorJitter\",\n", " \"ToNumpyd\",\n", " \"RandFlipd\",\n", " \"RandRotate90d\",\n", " \"CastToTyped\",\n", " \"RandZoomd\",\n", " \"ScaleIntensityRanged\",\n", + " \"ToTensord_2\",\n", " \"Activationsd\",\n", " \"AsDiscreted\",\n", "]" @@ -316,10 +271,18 @@ " \n", " \n", " \n", - " Average\n", - " Minimum\n", - " Maximum\n", - " StdDev\n", + " Time (%)\n", + " Total Time (ns)\n", + " Instances\n", + " Avg (ns)\n", + " Med (ns)\n", + " Min (ns)\n", + " Max (ns)\n", + " StdDev (ns)\n", + " avg%\n", + " std%\n", + " min%\n", + " max%\n", " \n", " \n", " Range\n", @@ -327,96 +290,264 @@ " \n", " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", " ToTensord\n", - " 0.167288\n", - " 0.086387\n", - " 12.387309\n", - " 0.241382\n", + " 0.0\n", + " 54120900\n", + " 450\n", + " 120268.7\n", + " 100700.0\n", + " 68500\n", + " 721200\n", + " 59828.4\n", + " 0.222186\n", + " 0.110528\n", + " 0.126548\n", + " 1.332357\n", + " \n", + " \n", + " Lambdad\n", + " 0.0\n", + " 2460800\n", + " 50\n", + " 49216.0\n", + " 39450.0\n", + " 28700\n", + " 146100\n", + " 23219.6\n", + " 0.090922\n", + " 0.042896\n", + " 0.053021\n", + " 0.269908\n", " \n", " \n", - " ColorJitter\n", - " 87.391149\n", - " 39.744603\n", - " 69381.254333\n", - " 1011.136551\n", + " GridSplitd\n", + " 0.0\n", + " 55917100\n", + " 50\n", + " 1118342.0\n", + " 882450.0\n", + " 685900\n", + " 5798000\n", + " 801880.3\n", + " 2.066044\n", + " 1.481407\n", + " 1.267143\n", + " 10.711323\n", + " \n", + " \n", + " TorchVisiond_ColorJitter\n", + " 19.3\n", + " 22742525900\n", + " 450\n", + " 50538946.4\n", + " 36570950.0\n", + " 18874200\n", + " 202062000\n", + " 36166736.1\n", + " 93.366500\n", + " 66.815037\n", + " 34.868515\n", + " 373.292740\n", " \n", " \n", " ToNumpyd\n", - " 1.012089\n", - " 0.672818\n", - " 3.133000\n", - " 0.224033\n", + " 0.1\n", + " 91848800\n", + " 450\n", + " 204108.4\n", + " 176450.0\n", + " 128700\n", + " 745700\n", + " 87187.5\n", + " 0.377073\n", + " 0.161072\n", + " 0.237763\n", + " 1.377619\n", " \n", " \n", " RandFlipd\n", - " 0.493790\n", - " 0.038152\n", - " 3.055832\n", - " 0.454696\n", + " 0.0\n", + " 39524100\n", + " 450\n", + " 87831.3\n", + " 45950.0\n", + " 7600\n", + " 2748400\n", + " 153536.8\n", + " 0.162261\n", + " 0.283646\n", + " 0.014040\n", + " 5.077440\n", " \n", " \n", " RandRotate90d\n", - " 0.728797\n", - " 0.109967\n", - " 13.208778\n", - " 0.757129\n", + " 0.1\n", + " 59017500\n", + " 450\n", + " 131150.0\n", + " 82250.0\n", + " 17400\n", + " 1050600\n", + " 123950.6\n", + " 0.242289\n", + " 0.228988\n", + " 0.032145\n", + " 1.940896\n", " \n", " \n", " CastToTyped\n", - " 0.842573\n", - " 0.451225\n", - " 1.752745\n", - " 0.113464\n", + " 0.0\n", + " 51677300\n", + " 450\n", + " 114838.4\n", + " 101350.0\n", + " 67500\n", + " 1154000\n", + " 60160.7\n", + " 0.212154\n", + " 0.111142\n", + " 0.124701\n", + " 2.131919\n", " \n", " \n", " RandZoomd\n", - " 4.207215\n", - " 0.165282\n", - " 62.294807\n", - " 7.274620\n", + " 0.3\n", + " 384269900\n", + " 450\n", + " 853933.1\n", + " 65400.0\n", + " 21000\n", + " 22487800\n", + " 2212634.3\n", + " 1.577570\n", + " 4.087658\n", + " 0.038796\n", + " 41.544340\n", " \n", " \n", " ScaleIntensityRanged\n", - " 3.557721\n", - " 2.868950\n", - " 9.194038\n", - " 0.552828\n", + " 0.2\n", + " 244892100\n", + " 450\n", + " 544204.7\n", + " 441950.0\n", + " 321800\n", + " 8677700\n", + " 541248.4\n", + " 1.005373\n", + " 0.999911\n", + " 0.594499\n", + " 16.031329\n", + " \n", + " \n", + " ToTensord_2\n", + " 0.0\n", + " 50674700\n", + " 450\n", + " 112610.4\n", + " 95650.0\n", + " 71300\n", + " 613700\n", + " 51643.2\n", + " 0.208038\n", + " 0.095407\n", + " 0.131721\n", + " 1.133760\n", " \n", " \n", " Activationsd\n", - " 0.596121\n", - " 0.533627\n", - " 2.292575\n", - " 0.072093\n", + " 0.0\n", + " 48966300\n", + " 450\n", + " 108814.0\n", + " 95900.0\n", + " 75200\n", + " 428700\n", + " 44791.2\n", + " 0.201025\n", + " 0.082748\n", + " 0.138926\n", + " 0.791988\n", " \n", " \n", " AsDiscreted\n", - " 1.003256\n", - " 0.914910\n", - " 2.048943\n", - " 0.089788\n", + " 0.1\n", + " 65417500\n", + " 450\n", + " 145372.2\n", + " 117000.0\n", + " 90200\n", + " 4613300\n", + " 219185.3\n", + " 0.268563\n", + " 0.404927\n", + " 0.166637\n", + " 8.522688\n", " \n", " \n", "\n", "" ], "text/plain": [ - " Average Minimum Maximum StdDev\n", - "Range \n", - "ToTensord 0.167288 0.086387 12.387309 0.241382\n", - "ColorJitter 87.391149 39.744603 69381.254333 1011.136551\n", - "ToNumpyd 1.012089 0.672818 3.133000 0.224033\n", - "RandFlipd 0.493790 0.038152 3.055832 0.454696\n", - "RandRotate90d 0.728797 0.109967 13.208778 0.757129\n", - "CastToTyped 0.842573 0.451225 1.752745 0.113464\n", - "RandZoomd 4.207215 0.165282 62.294807 7.274620\n", - "ScaleIntensityRanged 3.557721 2.868950 9.194038 0.552828\n", - "Activationsd 0.596121 0.533627 2.292575 0.072093\n", - "AsDiscreted 1.003256 0.914910 2.048943 0.089788" + " Time (%) Total Time (ns) Instances Avg (ns) \\\n", + "Range \n", + "ToTensord 0.0 54120900 450 120268.7 \n", + "Lambdad 0.0 2460800 50 49216.0 \n", + "GridSplitd 0.0 55917100 50 1118342.0 \n", + "TorchVisiond_ColorJitter 19.3 22742525900 450 50538946.4 \n", + "ToNumpyd 0.1 91848800 450 204108.4 \n", + "RandFlipd 0.0 39524100 450 87831.3 \n", + "RandRotate90d 0.1 59017500 450 131150.0 \n", + "CastToTyped 0.0 51677300 450 114838.4 \n", + "RandZoomd 0.3 384269900 450 853933.1 \n", + "ScaleIntensityRanged 0.2 244892100 450 544204.7 \n", + "ToTensord_2 0.0 50674700 450 112610.4 \n", + "Activationsd 0.0 48966300 450 108814.0 \n", + "AsDiscreted 0.1 65417500 450 145372.2 \n", + "\n", + " Med (ns) Min (ns) Max (ns) StdDev (ns) \\\n", + "Range \n", + "ToTensord 100700.0 68500 721200 59828.4 \n", + "Lambdad 39450.0 28700 146100 23219.6 \n", + "GridSplitd 882450.0 685900 5798000 801880.3 \n", + "TorchVisiond_ColorJitter 36570950.0 18874200 202062000 36166736.1 \n", + "ToNumpyd 176450.0 128700 745700 87187.5 \n", + "RandFlipd 45950.0 7600 2748400 153536.8 \n", + "RandRotate90d 82250.0 17400 1050600 123950.6 \n", + "CastToTyped 101350.0 67500 1154000 60160.7 \n", + "RandZoomd 65400.0 21000 22487800 2212634.3 \n", + "ScaleIntensityRanged 441950.0 321800 8677700 541248.4 \n", + "ToTensord_2 95650.0 71300 613700 51643.2 \n", + "Activationsd 95900.0 75200 428700 44791.2 \n", + "AsDiscreted 117000.0 90200 4613300 219185.3 \n", + "\n", + " avg% std% min% max% \n", + "Range \n", + "ToTensord 0.222186 0.110528 0.126548 1.332357 \n", + "Lambdad 0.090922 0.042896 0.053021 0.269908 \n", + "GridSplitd 2.066044 1.481407 1.267143 10.711323 \n", + "TorchVisiond_ColorJitter 93.366500 66.815037 34.868515 373.292740 \n", + "ToNumpyd 0.377073 0.161072 0.237763 1.377619 \n", + "RandFlipd 0.162261 0.283646 0.014040 5.077440 \n", + "RandRotate90d 0.242289 0.228988 0.032145 1.940896 \n", + "CastToTyped 0.212154 0.111142 0.124701 2.131919 \n", + "RandZoomd 1.577570 4.087658 0.038796 41.544340 \n", + "ScaleIntensityRanged 1.005373 0.999911 0.594499 16.031329 \n", + "ToTensord_2 0.208038 0.095407 0.131721 1.133760 \n", + "Activationsd 0.201025 0.082748 0.138926 0.791988 \n", + "AsDiscreted 0.268563 0.404927 0.166637 8.522688 " ] }, "execution_count": 7, @@ -434,10 +565,16 @@ "\n", "# Get the entries for training transforms only (to avoid nested ranges)\n", "summary = summary.loc[transforms]\n", - "summary.columns = [c.replace(\" (ns)\", \"\") for c in summary.columns]\n", "\n", - "# Normalize each transform range with total average time (percentage of transfom time)\n", - "summary = summary[[\"Average\", \"Minimum\", \"Maximum\", \"StdDev\"]] / summary[\"Average\"].sum() * 100\n", + "# Nsys output column names are different in different versions, \n", + "# so we need to find the corresponding columns\n", + "avg_col = [c for c in summary.columns if c.startswith('Av')] \n", + "std_col = [c for c in summary.columns if c.startswith('Std')]\n", + "min_col = [c for c in summary.columns if c.startswith('Min')] \n", + "max_col = [c for c in summary.columns if c.startswith('Max')] \n", + "\n", + "# Normalize each transform range with total average time (percentage of transform time)\n", + "summary[[\"avg%\", \"std%\", 'min%', 'max%']] = summary[avg_col+std_col+min_col+max_col] / summary[avg_col].sum()[0] * 100\n", "summary" ] }, @@ -458,7 +595,7 @@ }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -471,8 +608,8 @@ "plt.style.use(\"fivethirtyeight\")\n", "plt.style.use(\"tableau-colorblind10\")\n", "axes = summary.plot.barh(\n", - " y=\"Average\",\n", - " xerr=\"StdDev\",\n", + " y=\"avg%\",\n", + " xerr=\"std%\",\n", " title=\"Average Time\",\n", " xlabel=\"\",\n", " fontsize=16,\n", @@ -502,7 +639,7 @@ }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -513,7 +650,7 @@ ], "source": [ "axes = summary.plot.barh(\n", - " y=[\"Average\", \"Minimum\", \"Maximum\"],\n", + " y=['avg%', \"min%\", \"max%\"],\n", " xlabel=\"\",\n", " fontsize=16,\n", " figsize=(15, 15),\n", @@ -552,7 +689,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.8.12" } }, "nbformat": 4, diff --git a/pathology/tumor_detection/torch/camelyon_train_evaluate_pytorch_gpu.py b/pathology/tumor_detection/torch/camelyon_train_evaluate_pytorch_gpu.py index c6bb4c110e..2fcf6f0cbb 100644 --- a/pathology/tumor_detection/torch/camelyon_train_evaluate_pytorch_gpu.py +++ b/pathology/tumor_detection/torch/camelyon_train_evaluate_pytorch_gpu.py @@ -7,27 +7,28 @@ import monai import numpy as np -from monai.apps.pathology.data import PatchWSIDataset -from monai.data import DataLoader, load_decathlon_datalist +from monai.data import CSVDataset, DataLoader, PatchWSIDataset from monai.networks.nets import TorchVisionFCModel from monai.optimizers import Novograd from monai.transforms import ( Activations, AsDiscrete, CastToType, - CastToTypeD, + CastToTyped, Compose, CuCIM, + GridSplitd, + Lambdad, RandCuCIM, - RandFlipD, - RandRotate90D, - RandZoomD, - ScaleIntensityRangeD, + RandFlipd, + RandRotate90d, + RandZoomd, + ScaleIntensityRanged, ToCupy, - ToNumpyD, - TorchVisionD, + ToNumpyd, + TorchVisiond, ToTensor, - ToTensorD, + ToTensord, ) from monai.utils import first, set_determinism @@ -185,7 +186,7 @@ def main(cfg): fh.setLevel(logging.INFO) logger.addHandler(fh) - # Set TensorBoard summary writter + # Set TensorBoard summary writer writer = SummaryWriter(log_dir) # Save configs @@ -210,11 +211,28 @@ def main(cfg): preprocess_cpu_valid = None preprocess_gpu_valid = None if cfg["backend"] == "cucim": - preprocess_cpu_train = Compose([ToTensorD(keys="label")]) + preprocess_cpu_train = Compose( + [ + Lambdad(keys="label", func=lambda x: x.reshape((1, cfg["grid_shape"], cfg["grid_shape"]))), + GridSplitd( + keys=("image", "label"), + grid=(cfg["grid_shape"], cfg["grid_shape"]), + size={"image": cfg["patch_size"], "label": 1}, + ), + ToTensord(keys="label"), + ] + ) preprocess_gpu_train = Compose( [ ToCupy(), - RandCuCIM(name="rand_color_jitter", prob=cfg["prob"], brightness=64.0 / 255.0, contrast=0.75, saturation=0.25, hue=0.04), + RandCuCIM( + name="rand_color_jitter", + prob=1.0, + brightness=64.0 / 255.0, + contrast=0.75, + saturation=0.25, + hue=0.04, + ), RandCuCIM(name="rand_image_flip", prob=cfg["prob"], spatial_axis=-1), RandCuCIM(name="rand_image_rotate_90", prob=cfg["prob"], max_k=3, spatial_axis=(-2, -1)), CastToType(dtype=np.float32), @@ -223,7 +241,17 @@ def main(cfg): ToTensor(device=device), ] ) - preprocess_cpu_valid = Compose([ToTensorD(keys="label")]) + preprocess_cpu_valid = Compose( + [ + Lambdad(keys="label", func=lambda x: x.reshape((1, cfg["grid_shape"], cfg["grid_shape"]))), + GridSplitd( + keys=("image", "label"), + grid=(cfg["grid_shape"], cfg["grid_shape"]), + size={"image": cfg["patch_size"], "label": 1}, + ), + ToTensord(keys="label"), + ] + ) preprocess_gpu_valid = Compose( [ ToCupy(dtype=np.float32), @@ -234,24 +262,36 @@ def main(cfg): elif cfg["backend"] == "numpy": preprocess_cpu_train = Compose( [ - ToTensorD(keys=("image", "label")), - TorchVisionD( + Lambdad(keys="label", func=lambda x: x.reshape((1, cfg["grid_shape"], cfg["grid_shape"]))), + GridSplitd( + keys=("image", "label"), + grid=(cfg["grid_shape"], cfg["grid_shape"]), + size={"image": cfg["patch_size"], "label": 1}, + ), + ToTensord(keys=("image", "label")), + TorchVisiond( keys="image", name="ColorJitter", brightness=64.0 / 255.0, contrast=0.75, saturation=0.25, hue=0.04 ), - ToNumpyD(keys="image"), - RandFlipD(keys="image", prob=cfg["prob"], spatial_axis=-1), - RandRotate90D(keys="image", prob=cfg["prob"]), - CastToTypeD(keys="image", dtype=np.float32), - RandZoomD(keys="image", prob=cfg["prob"], min_zoom=0.9, max_zoom=1.1), - ScaleIntensityRangeD(keys="image", a_min=0.0, a_max=255.0, b_min=-1.0, b_max=1.0), - ToTensorD(keys="image"), + ToNumpyd(keys="image"), + RandFlipd(keys="image", prob=cfg["prob"], spatial_axis=-1), + RandRotate90d(keys="image", prob=cfg["prob"]), + CastToTyped(keys="image", dtype=np.float32), + RandZoomd(keys="image", prob=cfg["prob"], min_zoom=0.9, max_zoom=1.1), + ScaleIntensityRanged(keys="image", a_min=0.0, a_max=255.0, b_min=-1.0, b_max=1.0), + ToTensord(keys="image"), ] ) preprocess_cpu_valid = Compose( [ - CastToTypeD(keys="image", dtype=np.float32), - ScaleIntensityRangeD(keys="image", a_min=0.0, a_max=255.0, b_min=-1.0, b_max=1.0), - ToTensorD(keys=("image", "label")), + Lambdad(keys="label", func=lambda x: x.reshape((1, cfg["grid_shape"], cfg["grid_shape"]))), + GridSplitd( + keys=("image", "label"), + grid=(cfg["grid_shape"], cfg["grid_shape"]), + size={"image": cfg["patch_size"], "label": 1}, + ), + CastToTyped(keys="image", dtype=np.float32), + ScaleIntensityRanged(keys="image", a_min=0.0, a_max=255.0, b_min=-1.0, b_max=1.0), + ToTensord(keys=("image", "label")), ] ) else: @@ -265,41 +305,42 @@ def main(cfg): ] ) - # Create MONAI dataset - train_json_info_list = load_decathlon_datalist( - data_list_file_path=cfg["dataset_json"], - data_list_key="training", - base_dir=cfg["data_root"], - ) - valid_json_info_list = load_decathlon_datalist( - data_list_file_path=cfg["dataset_json"], - data_list_key="validation", - base_dir=cfg["data_root"], + # Create train dataset and dataloader + train_data_list = CSVDataset( + cfg["train_file"], + col_groups={"image": 0, "patch_location": [2, 1], "label": [3, 6, 9, 4, 7, 10, 5, 8, 11]}, + kwargs_read_csv={"header": None}, + transform=Lambdad("image", lambda x: os.path.join(cfg["root"], "training/images", x + ".tif")), ) train_dataset = PatchWSIDataset( - data=train_json_info_list, - region_size=cfg["region_size"], - grid_shape=cfg["grid_shape"], - patch_size=cfg["patch_size"], + data=train_data_list, + patch_size=cfg["region_size"], + patch_level=0, transform=preprocess_cpu_train, - image_reader_name="openslide" if cfg["use_openslide"] else "cuCIM", - ) - valid_dataset = PatchWSIDataset( - data=valid_json_info_list, - region_size=cfg["region_size"], - grid_shape=cfg["grid_shape"], - patch_size=cfg["patch_size"], - transform=preprocess_cpu_valid, - image_reader_name="openslide" if cfg["use_openslide"] else "cuCIM", + reader="openslide" if cfg["use_openslide"] else "cuCIM", ) - - # DataLoaders train_dataloader = DataLoader( train_dataset, num_workers=cfg["num_workers"], batch_size=cfg["batch_size"], pin_memory=cfg["pin"] ) - valid_dataloader = DataLoader( - valid_dataset, num_workers=cfg["num_workers"], batch_size=cfg["batch_size"], pin_memory=cfg["pin"] - ) + + # Create validation dataset and dataloader + if not cfg["no_validate"]: + valid_data_list = CSVDataset( + cfg["valid_file"], + col_groups={"image": 0, "patch_location": [2, 1], "label": [3, 6, 9, 4, 7, 10, 5, 8, 11]}, + kwargs_read_csv={"header": None}, + transform=Lambdad("image", lambda x: os.path.join(cfg["root"], "training/images", x + ".tif")), + ) + valid_dataset = PatchWSIDataset( + data=valid_data_list, + patch_size=cfg["region_size"], + patch_level=0, + transform=preprocess_cpu_valid, + reader="openslide" if cfg["use_openslide"] else "cuCIM", + ) + valid_dataloader = DataLoader( + valid_dataset, num_workers=cfg["num_workers"], batch_size=cfg["batch_size"], pin_memory=cfg["pin"] + ) # Get sample batch and some info first_sample = first(train_dataloader) @@ -314,7 +355,8 @@ def main(cfg): ) logging.info(f"Batch size: {cfg['batch_size']}") logging.info(f"[Training] number of batches: {len(train_dataloader)}") - logging.info(f"[Validation] number of batches: {len(valid_dataloader)}") + if not cfg["no_validate"]: + logging.info(f"[Validation] number of batches: {len(valid_dataloader)}") # ------------------------------------------------------------------------- # Deep Learning Model and Configurations # ------------------------------------------------------------------------- @@ -352,7 +394,10 @@ def main(cfg): total_valid_time, total_train_time = 0.0, 0.0 t_start = time.perf_counter() - metric_summary = {"loss": np.Inf, "accuracy": 0, "best_epoch": 1} + if cfg["no_validate"]: + metric_summary = {} + else: + metric_summary = {"loss": np.Inf, "accuracy": 0, "best_epoch": 1} # Training/Validation Loop for _ in range(cfg["n_epochs"]): t_epoch = time.perf_counter() @@ -375,14 +420,16 @@ def main(cfg): ) if scheduler is not None: scheduler.step() - if cfg["save"]: + if not cfg["no_save"]: torch.save(model.state_dict(), os.path.join(log_dir, f"model_epoch_{train_counter['epoch']}.pt")) t_train = time.perf_counter() train_time = t_train - t_epoch total_train_time += train_time # Validation - if cfg["validate"]: + if cfg["no_validate"]: + logging.info(f"[Epoch: {train_counter['epoch']}/{cfg['n_epochs']}] Train time: {train_time:.1f}s") + else: valid_loss, valid_acc = validation( model, loss_func, @@ -407,8 +454,6 @@ def main(cfg): f"[Epoch: {train_counter['epoch']}/{cfg['n_epochs']}] loss: {valid_loss:.3f}, accuracy: {valid_acc:.3f}, " f"time: {t_valid - t_epoch:.1f}s (train: {train_time:.1f}s, valid: {valid_time:.1f}s)" ) - else: - logging.info(f"[Epoch: {train_counter['epoch']}/{cfg['n_epochs']}] Train time: {train_time:.1f}s") writer.flush() t_end = time.perf_counter() @@ -420,7 +465,7 @@ def main(cfg): logging.info(f"Metric Summary: {metric_summary}") # Save the best and final model - if cfg["validate"] is True: + if not cfg["no_validate"] and not cfg["no_save"]: copyfile( os.path.join(log_dir, f"model_epoch_{metric_summary['best_epoch']}.pt"), os.path.join(log_dir, "model_best.pt"), @@ -440,24 +485,18 @@ def main(cfg): def parse_arguments(): parser = ArgumentParser(description="Tumor detection on whole slide pathology images.") - parser.add_argument( - "--dataset", - type=str, - default="./dataset_0.json", - dest="dataset_json", - help="path to dataset json file", - ) parser.add_argument( "--root", type=str, - default="/workspace/data/medical/pathology/", - dest="data_root", - help="path to root folder of images containing training folder", + default="/workspace/data/medical/pathology", + help="path to image folder containing training/validation", ) + parser.add_argument("--train-file", type=str, default="training.csv", help="path to training data file") + parser.add_argument("--valid-file", type=str, default="validation.csv", help="path to training data file") parser.add_argument("--logdir", type=str, default="./logs/", dest="logdir", help="log directory") parser.add_argument("--rs", type=int, default=256 * 3, dest="region_size", help="region size") - parser.add_argument("--gs", type=int, default=3, dest="grid_shape", help="image grid shape (3x3)") + parser.add_argument("--gs", type=int, default=3, dest="grid_shape", help="image grid shape e.g 3 means 3x3") parser.add_argument("--ps", type=int, default=224, dest="patch_size", help="patch size") parser.add_argument("--bs", type=int, default=64, dest="batch_size", help="batch size") parser.add_argument("--ep", type=int, default=4, dest="n_epochs", help="number of epochs") @@ -473,8 +512,8 @@ def parse_arguments(): parser.add_argument("--pretrain", action="store_true", help="activate Imagenet weights") parser.add_argument("--benchmark", action="store_true", help="activate Imagenet weights") - parser.add_argument("--save", action="store_true", help="save model at each epoch") - parser.add_argument("--validate", action="store_true", help="use optimized parameters") + parser.add_argument("--no-save", action="store_true", help="save model at each epoch") + parser.add_argument("--no-validate", action="store_true", help="use optimized parameters") parser.add_argument("--baseline", action="store_true", help="use baseline parameters") parser.add_argument("--optimized", action="store_true", help="use optimized parameters") parser.add_argument("-b", "--backend", type=str, dest="backend", help="backend for transforms") diff --git a/performance_profiling/pathology/train_evaluate_nvtx.py b/performance_profiling/pathology/train_evaluate_nvtx.py index dbe3d77130..344681a245 100644 --- a/performance_profiling/pathology/train_evaluate_nvtx.py +++ b/performance_profiling/pathology/train_evaluate_nvtx.py @@ -7,27 +7,28 @@ import monai import numpy as np -from monai.apps.pathology.data import PatchWSIDataset -from monai.data import DataLoader, load_decathlon_datalist +from monai.data import CSVDataset, DataLoader, PatchWSIDataset from monai.networks.nets import TorchVisionFCModel from monai.optimizers import Novograd from monai.transforms import ( Activations, AsDiscrete, CastToType, - CastToTypeD, + CastToTyped, Compose, CuCIM, + GridSplitd, + Lambdad, RandCuCIM, - RandFlipD, - RandRotate90D, - RandZoomD, - ScaleIntensityRangeD, + RandFlipd, + RandRotate90d, + RandZoomd, + ScaleIntensityRanged, ToCupy, - ToNumpyD, - TorchVisionD, + ToNumpyd, + TorchVisiond, ToTensor, - ToTensorD, + ToTensord, ) from monai.utils import first, set_determinism, Range @@ -78,20 +79,23 @@ def training( writer: SummaryWriter, print_step, ): + summary["epoch"] += 1 + model.train() n_steps = len(dataloader) iter_data = iter(dataloader) for step in range(n_steps): + summary["step"] += 1 with Range("Step"): with Range("Data Loading"): batch = next(iter_data) x = batch["image"].to(device) y = batch["label"].to(device) - if pre_process is not None: - x = pre_process(x) + if pre_process is not None: + x = pre_process(x) with autocast(enabled=amp): output = model(x) @@ -121,10 +125,6 @@ def training( f"Step: {step + 1}/{n_steps} -- " f"train_loss: {loss_data:.5f}, train_acc: {acc_data:.3f}" ) - - summary["step"] += 1 - - summary["epoch"] += 1 return summary @@ -212,41 +212,66 @@ def main(cfg): preprocess_cpu_valid = None preprocess_gpu_valid = None if cfg["backend"] == "cucim": - preprocess_cpu_train = Compose([ToTensorD(keys="label")]) + preprocess_cpu_train = Compose( + [ + Lambdad(keys="label", func=lambda x: x.reshape((1, *cfg["grid_shape"]))), + GridSplitd( + keys=("image", "label"), grid=cfg["grid_shape"], size={"image": cfg["patch_size"], "label": 1} + ), + ToTensord(keys="label"), + ] + ) preprocess_gpu_train = Compose( [ Range()(ToCupy()), Range("ColorJitter")( - RandCuCIM(name="color_jitter", brightness=64.0 / 255.0, contrast=0.75, saturation=0.25, hue=0.04) + RandCuCIM( + name="rand_color_jitter", + prob=1.0, + brightness=64.0 / 255.0, + contrast=0.75, + saturation=0.25, + hue=0.04, + ) ), - Range("RandomFlip")(RandCuCIM(name="image_flip", apply_prob=cfg["prob"], spatial_axis=-1)), + Range("RandomFlip")(RandCuCIM(name="rand_image_flip", prob=cfg["prob"], spatial_axis=-1)), Range("RandomRotate90")( RandCuCIM(name="rand_image_rotate_90", prob=cfg["prob"], max_k=3, spatial_axis=(-2, -1)) ), Range()(CastToType(dtype=np.float32)), - Range("RandomZoom")(RandCuCIM(name="rand_zoom", min_zoom=0.9, max_zoom=1.1)), + Range("RandomZoom")(RandCuCIM(name="rand_zoom", prob=cfg["prob"], min_zoom=0.9, max_zoom=1.1)), Range("ScaleIntensity")( CuCIM(name="scale_intensity_range", a_min=0.0, a_max=255.0, b_min=-1.0, b_max=1.0) ), Range()(ToTensor(device=device)), ] ) - preprocess_cpu_valid = Compose([ToTensorD(keys="label")]) - preprocess_gpu_valid = Compose( + preprocess_cpu_valid = Compose( [ - Range("ValidToCupyAndCast")(ToCupy(dtype=np.float32)), - Range("ValidScaleIntensity")( - CuCIM(name="scale_intensity_range", a_min=0.0, a_max=255.0, b_min=-1.0, b_max=1.0) + Lambdad(keys="label", func=lambda x: x.reshape((1, *cfg["grid_shape"]))), + GridSplitd( + keys=("image", "label"), grid=cfg["grid_shape"], size={"image": cfg["patch_size"], "label": 1} ), - Range("ValidToTensor")(ToTensor(device=device)), + ToTensord(keys="label"), + ] + ) + preprocess_gpu_valid = Compose( + [ + ToCupy(dtype=np.float32), + CuCIM(name="scale_intensity_range", a_min=0.0, a_max=255.0, b_min=-1.0, b_max=1.0), + ToTensor(device=device), ] ) elif cfg["backend"] == "numpy": preprocess_cpu_train = Compose( [ - Range()(ToTensorD(keys=("image", "label"))), + Lambdad(keys="label", func=lambda x: x.reshape((1, *cfg["grid_shape"]))), + GridSplitd( + keys=("image", "label"), grid=cfg["grid_shape"], size={"image": cfg["patch_size"], "label": 1} + ), + Range()(ToTensord(keys=("image", "label"))), Range("ColorJitter")( - TorchVisionD( + TorchVisiond( keys="image", name="ColorJitter", brightness=64.0 / 255.0, @@ -255,24 +280,26 @@ def main(cfg): hue=0.04, ) ), - Range()(ToNumpyD(keys="image")), - Range("RandomFlip")(RandFlipD(keys="image", prob=cfg["prob"], spatial_axis=-1)), - Range("RandomRotate90")(RandRotate90D(keys="image", prob=cfg["prob"])), - Range()(CastToTypeD(keys="image", dtype=np.float32)), - Range("RandomZoom")(RandZoomD(keys="image", prob=cfg["prob"], min_zoom=0.9, max_zoom=1.1)), + Range()(ToNumpyd(keys="image")), + Range("RandomFlip")(RandFlipd(keys="image", prob=cfg["prob"], spatial_axis=-1)), + Range("RandomRotate90")(RandRotate90d(keys="image", prob=cfg["prob"])), + Range()(CastToTyped(keys="image", dtype=np.float32)), + Range("RandomZoom")(RandZoomd(keys="image", prob=cfg["prob"], min_zoom=0.9, max_zoom=1.1)), Range("ScaleIntensity")( - ScaleIntensityRangeD(keys="image", a_min=0.0, a_max=255.0, b_min=-1.0, b_max=1.0) + ScaleIntensityRanged(keys="image", a_min=0.0, a_max=255.0, b_min=-1.0, b_max=1.0) ), - Range()(ToTensorD(keys="image")), + Range()(ToTensord(keys="image")), ] ) preprocess_cpu_valid = Compose( [ - Range("ValidCastType")(CastToTypeD(keys="image", dtype=np.float32)), - Range("ValidScaleIntensity")( - ScaleIntensityRangeD(keys="image", a_min=0.0, a_max=255.0, b_min=-1.0, b_max=1.0) + Lambdad(keys="label", func=lambda x: x.reshape((1, *cfg["grid_shape"]))), + GridSplitd( + keys=("image", "label"), grid=cfg["grid_shape"], size={"image": cfg["patch_size"], "label": 1} ), - Range("ValidToTensor")(ToTensorD(keys=("image", "label"))), + CastToTyped(keys="image", dtype=np.float32), + ScaleIntensityRanged(keys="image", a_min=0.0, a_max=255.0, b_min=-1.0, b_max=1.0), + ToTensord(keys=("image", "label")), ] ) else: @@ -287,31 +314,32 @@ def main(cfg): ) # Create MONAI dataset - train_json_info_list = load_decathlon_datalist( - data_list_file_path=cfg["dataset_json"], - data_list_key="training", - base_dir=cfg["data_root"], - ) - valid_json_info_list = load_decathlon_datalist( - data_list_file_path=cfg["dataset_json"], - data_list_key="validation", - base_dir=cfg["data_root"], + train_data_list = CSVDataset( + cfg["train_file"], + col_groups={"image": 0, "location": [2, 1], "label": list(range(3, 12))}, + kwargs_read_csv={"header": None}, + transform=Lambdad("image", lambda x: os.path.join(cfg["root"], "training/images", x + ".tif")), ) train_dataset = PatchWSIDataset( - data=train_json_info_list, - region_size=cfg["region_size"], - grid_shape=cfg["grid_shape"], - patch_size=cfg["patch_size"], + data=train_data_list, + size=cfg["region_size"], + level=0, transform=preprocess_cpu_train, - image_reader_name="openslide" if cfg["use_openslide"] else "cuCIM", + reader="openslide" if cfg["use_openslide"] else "cuCIM", + ) + + valid_data_list = CSVDataset( + cfg["valid_file"], + col_groups={"image": 0, "location": [2, 1], "label": list(range(3, 12))}, + kwargs_read_csv={"header": None}, + transform=Lambdad("image", lambda x: os.path.join(cfg["root"], "validation/images", x + ".tif")), ) valid_dataset = PatchWSIDataset( - data=valid_json_info_list, - region_size=cfg["region_size"], - grid_shape=cfg["grid_shape"], - patch_size=cfg["patch_size"], + data=valid_data_list, + size=cfg["region_size"], + level=0, transform=preprocess_cpu_valid, - image_reader_name="openslide" if cfg["use_openslide"] else "cuCIM", + reader="openslide" if cfg["use_openslide"] else "cuCIM", ) # DataLoaders @@ -369,7 +397,7 @@ def main(cfg): # ------------------------------------------------------------------------- # Training/Evaluating # ------------------------------------------------------------------------- - train_counter = {"n_epochs": cfg["n_epochs"], "epoch": 1, "step": 1} + train_counter = {"n_epochs": cfg["n_epochs"], "epoch": 0, "step": 0} total_valid_time, total_train_time = 0.0, 0.0 t_start = time.perf_counter() @@ -427,7 +455,7 @@ def main(cfg): writer.add_scalar("valid/accuracy", valid_acc, train_counter["epoch"]) logging.info( - f"[Epoch: {train_counter['epoch']}/{cfg['n_epochs']}] loss: {valid_loss:.3f}, accuracy: {valid_acc:.2f}, " + f"[Epoch: {train_counter['epoch']}/{cfg['n_epochs']}] loss: {valid_loss:.3f}, accuracy: {valid_acc:.3f}, " f"time: {t_valid - t_epoch:.1f}s (train: {train_time:.1f}s, valid: {valid_time:.1f}s)" ) else: @@ -445,12 +473,12 @@ def main(cfg): # Save the best and final model if cfg["validate"] is True: copyfile( - os.path.join(log_dir, f"model_epoch_{metric_summary['best_epoch']}.pth"), - os.path.join(log_dir, "model_best.pth"), + os.path.join(log_dir, f"model_epoch_{metric_summary['best_epoch']}.pt"), + os.path.join(log_dir, "model_best.pt"), ) copyfile( - os.path.join(log_dir, f"model_epoch_{cfg['n_epochs']}.pth"), - os.path.join(log_dir, "model_final.pth"), + os.path.join(log_dir, f"model_epoch_{cfg['n_epochs']}.pt"), + os.path.join(log_dir, "model_final.pt"), ) # Final prints @@ -463,24 +491,13 @@ def main(cfg): def parse_arguments(): parser = ArgumentParser(description="Tumor detection on whole slide pathology images.") - parser.add_argument( - "--dataset", - type=str, - default="./data/dataset_0.json", - dest="dataset_json", - help="path to dataset json file", - ) - parser.add_argument( - "--root", - type=str, - default="data/", - dest="data_root", - help="path to root folder of images containing training folder", - ) + parser.add_argument("--root", type=str, default="./", help="path to image folder containing training/validation") + parser.add_argument("--train-file", type=str, default="training.csv", help="path to training data file") + parser.add_argument("--valid-file", type=str, default="validation.csv", help="path to training data file") parser.add_argument("--logdir", type=str, default="./logs/", dest="logdir", help="log directory") parser.add_argument("--rs", type=int, default=256 * 3, dest="region_size", help="region size") - parser.add_argument("--gs", type=int, default=3, dest="grid_shape", help="image grid shape (3x3)") + parser.add_argument("--gs", type=int, default=(3, 3), nargs="+", dest="grid_shape", help="image grid shape (3x3)") parser.add_argument("--ps", type=int, default=224, dest="patch_size", help="patch size") parser.add_argument("--bs", type=int, default=64, dest="batch_size", help="batch size") parser.add_argument("--ep", type=int, default=4, dest="n_epochs", help="number of epochs") @@ -502,7 +519,7 @@ def parse_arguments(): parser.add_argument("--optimized", action="store_true", help="use optimized parameters") parser.add_argument("-b", "--backend", type=str, dest="backend", help="backend for transforms") - parser.add_argument("--cpu", type=int, default=10, dest="num_workers", help="number of workers") + parser.add_argument("--cpu", type=int, default=8, dest="num_workers", help="number of workers") parser.add_argument("--gpu", type=str, default="0", dest="gpu", help="which gpu to use") args = parser.parse_args() @@ -521,12 +538,12 @@ def parse_arguments(): config_dict["backend"] = "cucim" if config_dict["baseline"] is True: - config_dict["benchmark"] = True - config_dict["novograd"] = True + config_dict["benchmark"] = False + config_dict["novograd"] = False config_dict["pretrain"] = True - config_dict["cos"] = True + config_dict["cos"] = False config_dict["pin"] = False - config_dict["amp"] = True + config_dict["amp"] = False if config_dict["backend"] is None: config_dict["backend"] = "numpy"