In [1]:
import torch.nn as nn
import torch.nn.functional as F
import torch
import numpy as np
import sys
import re
import os
import torch.optim as optim
import time
import nibabel as nib
import matplotlib.pylab as plt
import math
from torch.utils.data import DataLoader
from tqdm import tqdm

from scipy import ndimage
from datetime import datetime
from glob import glob

In [2]:
# Let's see whether Nilearn is installed
try:
    import nilearn
except ImportError:
    # if not, install it using pip
    !pip install nilearn
from nilearn.image import resample_img

In [3]:
base_dir = "./"
raw_dataset_dir = "dataset/"
transformed_dataset_dir_path = "dataset/affine_transformed/"

In [4]:
is_colab = True
if is_colab:
    base_dir = "/content/drive/MyDrive/Colab Notebooks/"
    if not os.path.isdir(base_dir):
        from google.colab import drive
        drive.mount('/content/drive')

raw_dataset_dir = os.path.join(base_dir, raw_dataset_dir)
transformed_dataset_dir_path = os.path.join(base_dir, transformed_dataset_dir_path)

if os.path.isdir(raw_dataset_dir) and os.path.isdir(transformed_dataset_dir_path):
    print("dataset folder exists, OK")
else:
    raise Exception("check path for dataset:{} \n path for transformed dataset: {}"
                    .format(raw_dataset_dir, transformed_dataset_dir_path))


dataset folder exists, OK


In [5]:
print(base_dir)
sys.path.append(base_dir)
from classes.dataset_utils.toTorchDataset import ProcessedKit23TorchDataset
from classes.models import resnet_model_generator
from classes.config_class import ProjectModelResnetConfig
from classes.epoch_results import EpochResult

/content/drive/MyDrive/Colab Notebooks/


In [6]:
training_data = ProcessedKit23TorchDataset(train_data=True, test_size=0.25, dataset_dir =transformed_dataset_dir_path)
test_data = ProcessedKit23TorchDataset(train_data=False, test_size=0.25, dataset_dir =transformed_dataset_dir_path)

In [7]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Wed_Sep_21_10:33:58_PDT_2022
Cuda compilation tools, release 11.8, V11.8.89
Build cuda_11.8.r11.8/compiler.31833905_0


In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [9]:
proj_config = ProjectModelResnetConfig(model_depth=50, no_cuda=False)
proj_resnet_model, _ = resnet_model_generator.generate_model(proj_config)

In [10]:
proj_config.set_net_model(proj_resnet_model)

In [11]:
criterion = nn.CrossEntropyLoss(ignore_index=-1)
optimizer = optim.SGD(proj_config.nn_model.parameters(), lr=0.001, momentum=0.9, weight_decay=1e-3)
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99)
if not proj_config.no_cuda:
    criterion = criterion.cuda()

In [12]:
train_from_pretrained = True
epoch_res = EpochResult()
epoch_start = 0
if train_from_pretrained:
    print("loading from pretrained Med3D model")
    resnet10 = "pretrainedModel/resnet_10_23dataset.pth"
    resnet50 = "pretrainedModel/resnet_50_23dataset.pth"
    if proj_config.model_depth == 10:
        pretrained_w = os.path.join(base_dir, resnet10)
        proj_config.load_med3d_pretrain_weigth(pretrained_w)
    elif proj_config.model_depth == 50:
        pretrained_w = os.path.join(base_dir, resnet50)
        proj_config.load_med3d_pretrain_weigth(pretrained_w)
    else:
        raise Exception("Only depth 10 and 50 are used for now.")
else:
    # this continues from certain training points
    checkpoint_dir = "training_checkpoints/Model_resnet_50_epoch5.pth.tar"
    pretrained_w = os.path.join(base_dir, checkpoint_dir)
    checkpoint, epoch_res = proj_config.load_weight_from_epoch(pretrained_w)
    optimizer.load_state_dict(checkpoint['optimizer'])
    epoch_start = epoch_res.epoch_list[-1] + 1



loading from pretrained Med3D model


In [13]:
# data_loader = DataLoader(training_data, batch_size=proj_config.batch_size, shuffle=True, num_workers=proj_config.num_workers, pin_memory=proj_config.pin_memory)
data_loader = DataLoader(training_data, batch_size=2, shuffle=True, num_workers=proj_config.num_workers, pin_memory=proj_config.pin_memory)

In [14]:
if is_colab:
    proj_config.model_save_path = os.path.join(base_dir,"training_checkpoints/")

In [None]:
train_time_start = time.time()
batches_per_epoch = len(data_loader)

for epoch in range(epoch_start, proj_config.max_epoch):
    current_lr = scheduler.get_last_lr()
    running_loss = None
    print("current epoch={:5d} Learning Rate={}".format(epoch, current_lr))

    for batch_idx, batch_data  in enumerate(data_loader):
        imgs, segs = batch_data
        if not proj_config.no_cuda:
            imgs, segs = imgs.cuda(), segs.cuda()

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        y_preds = proj_config.nn_model(imgs.float())

        [n, _, z_size, y_size, x_size] = y_preds.shape

        resized_segs = np.zeros([n, z_size, y_size, x_size])
        for idx in range(n):
            seg = segs[idx][0]
            if not proj_config.no_cuda:
                seg = seg.to('cpu')
            [ori_z, ori_y, ori_x] = seg.shape
            scale = [z_size/ori_z, y_size/ori_y, x_size/ori_x]
            this_affine = np.array([[scale[0], 0, 0],[0, scale[1], 0],[0, 0, scale[2]]])
            resized_segs[idx] = ndimage.affine_transform(seg, this_affine, output_shape=resized_segs[idx].shape, cval=0)

        resized_segs = torch.tensor(resized_segs).to(torch.int64)
        if not proj_config.no_cuda:
            resized_segs = resized_segs.cuda()
        loss = criterion(y_preds, resized_segs)
        running_loss = loss.item()
        loss.backward()
        optimizer.step()


        total_processed_batches = (epoch - epoch_start) * batches_per_epoch + 1 + batch_idx
        avg_batch_time = (time.time() - train_time_start) / total_processed_batches
        if batch_idx % 25 == 0:
            print("Epoch:{} Batch:{} loss = {:.5f}, avg_batch_time = {:.5f}".format(epoch, batch_idx, running_loss, avg_batch_time))
    scheduler.step()
    epoch_res.append_result(epoch, running_loss, current_lr)
    model_checkpoint_path = proj_config.save_checkpoint_pathname(epoch, with_Datetime=False)
    torch.save({'epoch_list': epoch_res.epoch_list, 'loss_list': epoch_res.loss_list, 'lr_list': epoch_res.lr_list,
                'state_dict': proj_config.nn_model.state_dict(),'optimizer': optimizer.state_dict()},model_checkpoint_path, _use_new_zipfile_serialization=True)

print('Finished Training')

current epoch=    0 Learning Rate=[0.001]
Epoch:0 Batch:0 loss = 1.94046, avg_batch_time = 14.89788
Epoch:0 Batch:25 loss = 0.70820, avg_batch_time = 4.55481
Epoch:0 Batch:50 loss = 0.25120, avg_batch_time = 4.36809
Epoch:0 Batch:75 loss = 0.09838, avg_batch_time = 4.29674
Epoch:0 Batch:100 loss = 0.06968, avg_batch_time = 4.25866
Epoch:0 Batch:125 loss = 0.05650, avg_batch_time = 4.23861
Epoch:0 Batch:150 loss = 0.04549, avg_batch_time = 4.22377
Epoch:0 Batch:175 loss = 0.03829, avg_batch_time = 4.21484
current epoch=    1 Learning Rate=[0.00099]
Epoch:1 Batch:0 loss = 0.03950, avg_batch_time = 4.25714
Epoch:1 Batch:25 loss = 0.03361, avg_batch_time = 4.24719
Epoch:1 Batch:50 loss = 0.02986, avg_batch_time = 4.23764
Epoch:1 Batch:75 loss = 0.02656, avg_batch_time = 4.22968
Epoch:1 Batch:100 loss = 0.02520, avg_batch_time = 4.22631
Epoch:1 Batch:125 loss = 0.02523, avg_batch_time = 4.22128
Epoch:1 Batch:150 loss = 0.02088, avg_batch_time = 4.21915
Epoch:1 Batch:175 loss = 0.01853, avg_

In [None]:
# print(proj_config.nn_model)

In [None]:
!pwd

In [None]:
#Additional Info when using cuda
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')

In [None]:
print(base_dir)
print(resnet10)
pretrained_w = os.path.join(base_dir, resnet10)
print(pretrained_w)