# Train Noteboook

**Notebook used to train models on Compute Canada Cluster**

## Imports

In [24]:
import sys
sys.path.append('../')
sys.path.append('../utils')
sys.path.append('../preprocessing')
sys.path.append('../instanciate_models')
sys.path.append('../models')

import os
import json
import torch
import wandb

from monai.losses import DiceCELoss

from utils.data_utils import convert_niigz_to_numpy, prepare_dataset_for_training_local
from utils.visualization import visualize_infered_labels
from preprocessing.data_loader import load_data, load_test_data
from instanciate_models.make_unet_resblock2 import make_unet_resblock2
from models.inference.inference import model_inference, download_model_wandb
from log_wandb import log_wandb_run

## Data Loaders

In [8]:
task_name = "Task02_Heart"

# paths
dataset_path = "../dataset"
task_folder_path = os.path.join(dataset_path, task_name)

# load dataset.json file
with open(os.path.join(task_folder_path, "dataset.json"), "r") as f:
    dataset_json = json.load(f)

# to start training, should have this structure:
# task_folder_path
#     ├── train_val
#     │   ├── image_001.nii.gz
#     │   ├── image_002.nii.gz
#     │   ├── ...
#     │   ├── label_002.nii.gz
#     │   ├── label_001.nii.gz
#     │   ├── ...
#     ├── test
#     │   ├── image_001.nii.gz
#     │   ├── image_002.nii.gz
#     │   ├── ...
#     │── dataset.json

# check if the task folder is ready for training
if not os.path.exists(os.path.join(task_folder_path, "train_val")) and not os.path.exists(os.path.join(task_folder_path, "test")):
    assert os.path.exists(os.path.join(task_folder_path, "imagesTr")), "imagesTr folder does not exist"
    assert os.path.exists(os.path.join(task_folder_path, "labelsTr")), "labelsTr folder does not exist"
    assert os.path.exists(os.path.join(task_folder_path, "imagesTs")), "imagesTs folder does not exist"
    assert os.path.exists(os.path.join(task_folder_path, "dataset.json")), "dataset.json file does not exist"
    if sorted(os.listdir(os.path.join(task_folder_path, "imagesTr")))[0].endswith(".nii.gz"):
        print("Converting nii.gz to numpy...")
        convert_niigz_to_numpy(task_folder_path)
        print("Done")
    elif sorted(os.listdir(os.path.join(task_folder_path, "imagesTr")))[0].endswith(".npy"):
        assert len(os.listdir(os.path.join(task_folder_path, "imagesTr")))==dataset_json["numTraining"], "Number of training images does not match dataset.json"
        assert len(os.listdir(os.path.join(task_folder_path, "labelsTr")))==dataset_json["numTraining"], "Number of training labels does not match dataset.json"
        assert len(os.listdir(os.path.join(task_folder_path, "imagesTs")))==dataset_json["numTest"], "Number of test images does not match dataset.json"
        print("Numpy files already exist")
    print("Preparing dataset for training...")
    prepare_dataset_for_training_local(task_folder_path)
    print("Done")
else:
    assert len(os.listdir(os.path.join(task_folder_path, "train_val")))==dataset_json["numTraining"]*2, "Number of training images and labels does not match dataset.json"
    assert len(os.listdir(os.path.join(task_folder_path, "test")))==dataset_json["numTest"], "Number of test images does not match dataset.json"
    print("Task folder is already ready for training!")

Numpy files already exist
Preparing dataset for training...
Done


In [19]:
# init parameters
batch_size = 2
num_classes = len(dataset_json["labels"])
shuffle = True
normalize = True
resize = (128, 128, 128)
transform = None

# load dataloaders
train_dataloader, val_dataloader = load_data(task_folder_path, 
                                             batch_size=batch_size, 
                                             num_classes=num_classes, 
                                             shuffle=shuffle,
                                             normalize=normalize,
                                             resize=resize,
                                             transform=transform)

# print shapes
input_example = train_dataloader.dataset[0][0].unsqueeze(0)
input_shape = tuple(list(input_example[0].shape))
print("input shape:", input_shape)
print("train dataloader len:", len(train_dataloader.dataset))
print("val dataloader len:", len(val_dataloader.dataset))

input shape: (1, 128, 128, 128)
train dataloader len: 16
val dataloader len: 4


## Training

In [17]:
wandb.login()



True

In [20]:
# setup device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# instantiate model
model = make_unet_resblock2(0).to(device)

# print model summary
print(model)

The device is cpu
UNet(
  (predict_softmax): Softmax(dim=1)
  (encoder): ConvEncoder(
    (conv_blocks): ModuleList(
      (0): ResConvBlock(
        (conv_block_1): Conv3DDropoutNormActivation(
          (convolution): Conv3d(1, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
          (dropout): Dropout3d(p=0, inplace=False)
          (activation): LeakyReLU(negative_slope=0.01)
          (normalization): InstanceNorm3d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
        )
        (conv_block_2): Conv3DDropoutNormActivation(
          (convolution): Conv3d(32, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
          (dropout): Dropout3d(p=0, inplace=False)
          (activation): LeakyReLU(negative_slope=0.01)
          (normalization): InstanceNorm3d(32, eps=1e-05, momentum=0.1, affine=False, track_running_stats=False)
        )
        (conv_block_3): Conv3DDropoutNormActivation(
          (convolution): Conv3d(32, 32, kern

wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)


In [21]:
# Run
output = model(input_example.to(device))
print('input shape:', input_example.shape)
print('output shape unet:', output.shape)

wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)


input shape: torch.Size([1, 1, 128, 128, 128])
output shape unet: torch.Size([1, 2, 128, 128, 128])


In [23]:
# init params
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-7)
criterion = DiceCELoss()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.1, patience=20)
run_name = f"{model.__class__.__name__}_{task_name}"

# train model
log_wandb_run(model, 
              train_dataloader, 
              val_dataloader, 
              batch_size=batch_size,
              num_classes=num_classes, 
              num_epochs=3, 
              patience=100, 
              optimizer=optimizer, 
              criterion=criterion, 
              scheduler=scheduler,
              segmentation_ouput=True,
              run_name=run_name)

wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)


## Inference

In [26]:
# load model from wandb
model = download_model_wandb(make_unet_resblock2(0), "enzymes", "ift6759_project", "UNet", "v14")

wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)


In [None]:
# init parameters
batch_size = 1
shuffle = False
normalize = True
resize = (128, 128, 128)
transform = None
output_folder = os.path.join("inference", task_name)
output_filenames = sorted(os.listdir(os.path.join(task_folder_path, "test")))
output_filenames_idx = [filename[-7:-4] for filename in output_filenames]

# load test dataloader
test_dataloader = load_test_data(task_folder_path,
                                 batch_size=batch_size,
                                 shuffle=shuffle,
                                 normalize=normalize,
                                 resize=resize,
                                 transform=transform)

# perform inference on model and save output labels
model_inference(model,
                test_dataloader,
                task_folder_path,
                task_name,
                output_folder,
                output_filenames_idx)

### Visualisation

In [None]:
# load test dataloader
test_dataloader = load_test_data(task_folder_path,
                                 batch_size=1,
                                 shuffle=False,
                                 normalize=False,
                                 resize=None,
                                 transform=None)

labels_path = os.path.join(output_folder, sorted(os.listdir(output_folder))[-1])
visualize_infered_labels(test_dataloader, labels_path, figsize=(5, 5))