In [1]:
import pandas as pd
import os
from tqdm import tqdm
from sklearn.model_selection import train_test_split

import torch
from torch import nn, optim
from torch.utils.data import DataLoader
#from torchvision.transforms import transforms

from lib.cfg import *
from lib.dataloader import get_calcification_data_index, DatasetThyroid
from lib.model import Unet

In [2]:
# Dataloader init
df_data_index = get_calcification_data_index()

idx_train, idx_test = train_test_split(
    list(range(len(df_data_index))),
    test_size=0.2, 
    random_state=0
)

print('Data train size: ', len(idx_train))
print('Data test size: ', len(idx_test))
df_data_index_train = df_data_index.iloc[idx_train]
df_data_index_test = df_data_index.iloc[idx_test]

# ~3G RAM per image
batch_size_train = 2
dataloader_train = DataLoader(
    DatasetThyroid(df_data_index_train, image_transform, mask_transform, True), 
    batch_size=batch_size_train,  
    shuffle=True,
    num_workers=2
)

batch_size_test = 4
dataloader_test = DataLoader(
    DatasetThyroid(df_data_index_test, image_transform, mask_transform, False), 
    batch_size=batch_size_test,
    shuffle=True,
    num_workers=2
)

dataloader_all = DataLoader(
    DatasetThyroid(df_data_index, image_transform, mask_transform, False), 
    batch_size=batch_size_train, 
    shuffle=True,
    num_workers=2
)

Data train size:  694
Data test size:  174


In [3]:
# U-Net init
unet = Unet(3, 1).cuda()
unet.load_state_dict(torch.load('./unet_calcification.pth'))

optimizer = optim.Adam(unet.parameters(), lr=1e-3)
class_weights = torch.tensor([1.7]).cuda()
criterion = nn.BCEWithLogitsLoss(pos_weight=class_weights)

In [4]:
# Train
epoch_start = 0
num_epochs = 6

for epoch in range(epoch_start, epoch_start+num_epochs):
    print('Epoch {}/{}'.format(epoch, epoch_start+num_epochs - 1))
    print('-' * 10)

    # Train loss
    train_loss = 0
    train_step = 0
    for img, mask, _, _ in tqdm(dataloader_train, total=len(dataloader_train.dataset)/batch_size_train):
        train_step += 1
        img = img.cuda()
        mask = mask.cuda()
        optimizer.zero_grad()
        mask_pred = unet(img)
        loss = criterion(mask_pred, mask)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
    train_loss_avg = train_loss/train_step
    print("Epoch %d | Train Loss: %0.3f" % (epoch, train_loss_avg))
    
    # Test loss
    test_loss = 0
    test_step = 0
    for img, mask, _, _ in tqdm(dataloader_test, total=len(dataloader_test.dataset)/batch_size_test):
        test_step += 1
        with torch.no_grad():
            img = img.cuda()
            mask = mask.cuda()
            mask_pred = unet(img)
            loss = criterion(mask_pred, mask)
            test_loss += loss.item()
    test_loss_avg = test_loss/test_step
    print("Epoch %d | Test Loss: %0.3f" % (epoch, test_loss_avg))
    


  0%|          | 0/347.0 [00:00<?, ?it/s]

Epoch 0/5
----------


100%|██████████| 347/347.0 [02:49<00:00,  2.04it/s]
  0%|          | 0/43.5 [00:00<?, ?it/s]

Epoch 0 | Train Loss: 0.103


44it [00:15,  2.92it/s]                          
  0%|          | 0/347.0 [00:00<?, ?it/s]

Epoch 0 | Test Loss: 0.014
Epoch 1/5
----------


100%|██████████| 347/347.0 [02:49<00:00,  2.04it/s]
  0%|          | 0/43.5 [00:00<?, ?it/s]

Epoch 1 | Train Loss: 0.011


44it [00:15,  2.91it/s]                          
  0%|          | 0/347.0 [00:00<?, ?it/s]

Epoch 1 | Test Loss: 0.006
Epoch 2/5
----------


100%|██████████| 347/347.0 [02:49<00:00,  2.04it/s]
  0%|          | 0/43.5 [00:00<?, ?it/s]

Epoch 2 | Train Loss: 0.007


44it [00:15,  2.91it/s]                          
  0%|          | 0/347.0 [00:00<?, ?it/s]

Epoch 2 | Test Loss: 0.005
Epoch 3/5
----------


100%|██████████| 347/347.0 [02:49<00:00,  2.05it/s]
  0%|          | 0/43.5 [00:00<?, ?it/s]

Epoch 3 | Train Loss: 0.006


44it [00:15,  2.92it/s]                          
  0%|          | 0/347.0 [00:00<?, ?it/s]

Epoch 3 | Test Loss: 0.004
Epoch 4/5
----------


100%|██████████| 347/347.0 [02:49<00:00,  2.04it/s]
  0%|          | 0/43.5 [00:00<?, ?it/s]

Epoch 4 | Train Loss: 0.005


44it [00:15,  2.91it/s]                          
  0%|          | 0/347.0 [00:00<?, ?it/s]

Epoch 4 | Test Loss: 0.003
Epoch 5/5
----------


100%|██████████| 347/347.0 [02:49<00:00,  2.04it/s]
  0%|          | 0/43.5 [00:00<?, ?it/s]

Epoch 5 | Train Loss: 0.005


44it [00:15,  2.91it/s]                          

Epoch 5 | Test Loss: 0.003





# Predict mask and calculate features

In [9]:
import numpy as np
from skimage import measure

In [15]:
df_cal_features = []
batch_size_test = 2

for image, _, lesion_mask_gt_batch, img_id in tqdm(dataloader_all, total=len(dataloader_all.dataset)/batch_size_test):
    with torch.no_grad():
        mask_pred_batch = unet(image.cuda())

    for i in range(batch_size_test):
        mask_pred = mask_pred_batch[i].squeeze().cpu().numpy()
        lesion_mask_gt = lesion_mask_gt_batch[i].squeeze()

        mask_pred_roi = mask_pred*lesion_mask_gt.numpy()
        cal_mask_binary = np.zeros([512,512])
        cal_mask_binary[mask_pred_roi >= 0.5] = 1
        rescaling_factor = 500/512/10
        
        connected_region = measure.label(cal_mask_binary, connectivity=2)
        region_props = measure.regionprops(connected_region)
        
        cal_length = list()
        for props in region_props:
            cal_length.append(props.major_axis_length*rescaling_factor)

        cal_length = np.array(cal_length)
        cal_micro = cal_length[cal_length < 2]

        if len(cal_micro):
            micro_length_min = min(cal_micro)
            micro_length_mean = np.mean(cal_micro)
        else:
            micro_length_min = np.float('nan')
            micro_length_mean = np.float('nan')

        obj = {
            'pid': int(img_id[i]),
            'count_all': len(cal_length),
            'count_mirco': len(cal_micro),
            'micro_length_min': micro_length_min,
            'micro_length_mean': micro_length_mean
        }
        
        df_cal_features.append(obj)
        
df_cal_features = pd.DataFrame(df_cal_features)
df_cal_features = df_cal_features.sort_values(['pid'])

100%|██████████| 434/434.0 [01:20<00:00,  5.40it/s]
