In [1]:
# Leave lines below uncommented this if you get:
# OMP: Error #15: Initializing libiomp5md.dll, but found libiomp5md.dll already initialized.
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'

# Imports
import matplotlib.pyplot as plt
import numpy as np
from tqdm.auto import tqdm
import torch
from torch.utils.data import DataLoader
import torch.optim as optim
import gc

In [9]:
from glomerulus import Glomerulus, Patch, KidneySampleDataset, get_glomeruli, generate_glomerulus_patches_multi, generate_random_patches_multi
from networks import CBAM_R2UNet_v2
from utils import read_tiff, dataset_label_mean
from losses import bce_weighted_dice_loss

In [3]:
train_image_names = [
    # 'afa5e8098',
    # '4ef6695ce',
    # 'c68fe75ea',
    '26dc41664',
    '095bf7a1f',
    '54f2eec69',
    '1e2425f28',
    'e79de561c',
    'cb2d976f4',
    'b9a3865fc',
    '8242609fa',
    '0486052bb',
    '2f6ecfcdf'
]

test_image_names = [
    'b2dc8411c',
    'aaa6a05cc'
]

root_dir = './images'

train_images = []
train_glomeruli = []
test_images = []
test_glomeruli = []

for image_name in tqdm(train_image_names, desc='Loading Train Images'):
    image_path = os.path.join(root_dir, f'{image_name}.tiff')
    label_path = os.path.join(root_dir, f'{image_name}.json')
    try:
        image = read_tiff(image_path)
        train_images.append(image)
        
        train_glomeruli.append(get_glomeruli(label_path,'glomerulus'))
    except:
        print(f'Error reading {image_name}')
    
for image_name in tqdm(test_image_names, desc='Loading Test Images'):
    image_path = os.path.join(root_dir, f'{image_name}.tiff')
    label_path = os.path.join(root_dir, f'{image_name}.json')
    try:
        image = read_tiff(image_path)
        test_images.append(image)
        
        test_glomeruli.append(get_glomeruli(label_path,'glomerulus'))
    except:
        print(f'Error reading {image_name}')

Loading Train Images:   0%|          | 0/10 [00:00<?, ?it/s]

  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)


Loading Test Images:   0%|          | 0/2 [00:00<?, ?it/s]

In [8]:
patch_size = 512
model_resolution = 128
num_train_samples = 3000
num_val_samples = 200
batch_size = 100

model_name = 'CBAM_R2UNet_v2_rdn_data'
loss_name = 'Weighted_BCE_Dice'

dtype = torch.cuda.FloatTensor

In [5]:
# Generate patch samples from image
train_patches = generate_glomerulus_patches_multi(
    patch_size = patch_size,
    num_patches = int(num_train_samples * 0.85),
    glomeruli_list = train_glomeruli,
    image_list = train_images
) + generate_random_patches_multi(
    patch_size = patch_size,
    num_patches = num_train_samples - int(num_train_samples * 0.85),
    glomeruli_list = train_glomeruli,
    image_list = train_images
)

val_patches = generate_glomerulus_patches_multi(
    patch_size = patch_size,
    num_patches = int(num_val_samples * 0.85),
    glomeruli_list = test_glomeruli,
    image_list = test_images
) + generate_random_patches_multi(
    patch_size = patch_size,
    num_patches = num_val_samples - int(num_val_samples * 0.85),
    glomeruli_list = test_glomeruli,
    image_list = test_images
)
train_dataset = KidneySampleDataset(train_patches)
val_dataset = KidneySampleDataset(val_patches)



  0%|          | 0/2550 [00:00<?, ?it/s]

  0%|          | 0/450 [00:00<?, ?it/s]

  0%|          | 0/170 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

In [6]:
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=8, prefetch_factor=1)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=8, prefetch_factor=1)

In [11]:
# label_mean = dataset_label_mean(train_dataset) # Offset for class imbalance
label_mean = 0.2964583969706049
print(label_mean)

0.2964583969706049


In [13]:
model = CBAM_R2UNet_v2((model_resolution, model_resolution), (patch_size, patch_size)).cuda()

In [14]:
# model.load_state_dict(torch.load('./models/CBAM_R2UNet_v2_Weighted_BCE_Dice_t1000_best_loss.npz'))

In [15]:
# Print model size
param_size = 0
for param in model.parameters():
    param_size += param.nelement() * param.element_size()
buffer_size = 0
for buffer in model.buffers():
    buffer_size += buffer.nelement() * buffer.element_size()

size_all_mb = (param_size + buffer_size) / 1024**2
print('model size: {:.3f}MB'.format(size_all_mb))

model size: 8.126MB


In [16]:
import torch.optim as optim
import torch.distributed as dist
import torch.multiprocessing as mp
from torch.nn.parallel import DistributedDataParallel as DDP

In [17]:
def train(model, train_dataloader, val_dataloader, label_mean, epochs=100):
    gc.collect()
    torch.cuda.empty_cache()
    
    best_weights = None
    best_loss = 10000000
    
    train_loss = []
    val_loss = []
    

    criterion = bce_weighted_dice_loss
    optimizer = optim.Adam(model.parameters(), lr=0.008, weight_decay=1e-6)
    pbar = tqdm(total=epochs, desc='Training')
    for epoch in range(epochs):
        # Train
        model.train()
        running_loss = 0
        
        for inputs, labels in train_dataloader:
            gc.collect()
            torch.cuda.empty_cache()
            # Move data to GPU
            inputs = inputs.type(dtype)
            labels = labels.type(dtype)
            optimizer.zero_grad()
            # Run model
            outputs = model.forward(inputs)

            loss = criterion(outputs, labels, [1-label_mean, label_mean]).cuda()
            
            loss.backward()

            running_loss += loss.item()
            optimizer.step()
            
        train_epoch_loss = running_loss / len(train_dataloader)
        train_loss.append(train_epoch_loss)

        # Val
        model.eval()
        running_loss = 0
        with torch.no_grad():
            for inputs, labels in val_dataloader:
                gc.collect()
                torch.cuda.empty_cache()
                # Move data to GPU
                inputs = inputs.type(dtype)
                labels = labels.type(dtype)
                # Run model
                outputs = model.forward(inputs)

                loss = criterion(outputs, labels, [1-label_mean, label_mean]).cuda()
                running_loss += loss.item()
        val_epoch_loss = running_loss / len(val_dataloader)
        val_loss.append(val_epoch_loss)
        
        # Save model every 10 epochs
        # if (epoch+1)%10 == 0:
        torch.save(model.state_dict(), f'./models/{model_name}_{loss_name}_t{str(num_train_samples)}_b{str(batch_size)}_{str(epoch+1)}.npz')
            
        # Keep track of best weights
        if val_epoch_loss < best_loss:
            best_loss = val_epoch_loss
            best_weights = model.state_dict()
            
        pbar.set_postfix({'Train Loss': train_epoch_loss, 'Val Loss': val_epoch_loss, 'Best Val': best_loss})
        pbar.update(1)
    # Save weights with best loss
    torch.save(best_weights, f'./models/{model_name}_{loss_name}_t{str(num_train_samples)}_b{str(batch_size)}_best_loss.npz')
    return model, train_loss, val_loss

In [None]:
model, train_loss, val_loss = train(model, train_dataloader, val_dataloader, label_mean, epochs=200)

Training:   0%|          | 0/200 [00:00<?, ?it/s]

In [None]:
import json

with open(f'./models/{model_name}_{loss_name}_t{str(num_train_samples)}_b{str(batch_size)_train_loss.json', 'w') as f:
    json.dump(train_loss, f, indent=2) 

with open(f'./models/{model_name}_{loss_name}_t{str(num_train_samples)}_b{str(batch_size)_val_loss.json', 'w') as f:
    json.dump(val_loss, f, indent=2) 

In [None]:
plt.plot(np.arange(200), train_loss, label='Training Loss')
plt.plot(np.arange(200), val_loss, label='Validation Loss')
plt.legend(loc="upper right")
plt.show()