# Lung Nodule Detector Model

## My first pass using  UNet-like architecture [Link](https://www.researchgate.net/figure/Convolutional-neural-network-CNN-architecture-based-on-UNET-Ronneberger-et-al_fig2_323597886)

This model is trained using LUNA 16 dataset. The preprocessing procedure will be uploaded in a later date

I used Deep-Learning-with-PyTorch book ( Eli Stevens and Luca Antiga) as reference, but I used different data preprocessing, data augmentation, model architecture, a learning rate schedule, Dice loss, as well as weighted BCE Loss.

- Initialize model + data loader
- data augmentation
- visualizing data together with labeled data. (plot it out or use ipywidget)
- Pass the batches into model
- calculate train loss + back propagation
- calculate validation loss + record params
- save weights and print out all info..
- Visualize prediction compared to labels.
- Get the candidates of training data that the model is doing poorly against (False positive candidates) and to later train on that data to reduce false positive rate.  

In [1]:
# Reset VRAM
from numba import cuda 
device = cuda.get_current_device()
device.reset()

In [1]:
# I use Line messenger to send me loss updates while working on my full time job!
from parinya import LINE
line =  LINE("")

In [3]:
import torch
import torch.nn as nn
from torchvision import datasets
# from torchvision import transforms
from torch.utils.data import DataLoader
import os
import glob
import numpy as np
import time
import datetime
from ipywidgets import interact
import matplotlib.pyplot as plt
from skimage.transform import rotate, resize
from random import randint
import shutil

how to define a dataloader https://www.kaggle.com/dhananjay3/image-segmentation-from-scratch-in-pytorch

- Note to self: glob.glob is important else will not get all the files
- Note: the torchvision augmentation doesn't support ndarray, so I tried using PIL - Image.fromarray(image) - still causing error. I decided to use skimage instead

In [6]:
class DataLoaderImg(torch.utils.data.Dataset):
    def __init__(self, folder_path="train3", random_rotation=None, get_path=False):
        super(DataLoaderImg, self).__init__()
        self.img_files = glob.glob(os.path.join(folder_path, '*.npy'))
        self.random_rotation = random_rotation
        self.get_path = get_path

    def __getitem__(self, index):
            img_path = self.img_files[index]
            data = np.load(img_path)
            image = data[0]
            label = data[1]
            if self.random_rotation:
                rand_angle = (randint(-10,10))
                image = rotate(image, rand_angle, resize=True)
                label = rotate(label, rand_angle, resize=True)
                image = resize(image, (32,32,32))  #เนื่องจากพอ rotate แล้ว shape จะเปลี่ยน
                label = resize(label, (32,32,32))
            if self.get_path:
                return torch.from_numpy(image).float().unsqueeze(0), torch.from_numpy(label).float().unsqueeze(0), img_path
            return torch.from_numpy(image).float().unsqueeze(0), torch.from_numpy(label).float().unsqueeze(0)
                                    
    def __len__(self):
        return len(self.img_files)

In [7]:
train_dataset = DataLoaderImg(folder_path = "test5", random_rotation = False)
valid_dataset = DataLoaderImg(folder_path = "valid5", random_rotation = False)


In [8]:
batch_size = 32
num_workers = 0

train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle=True, num_workers=num_workers)
valid_loader = DataLoader(valid_dataset, batch_size = batch_size, shuffle=True, num_workers=num_workers)

In [9]:
len(train_loader)

42

In [10]:
# for (i,l) in train_loader:
#     print(i.shape)

# Visualizing data with labeling

In [11]:
def find_marked(display_label):
    layer_i = []
    for i in range(display_label.shape[0]):
        test_mask = display_label[i, :, :]
        if np.sum(test_mask)>0:
            layer_i.append(i)
    if len(layer_i)>0:
        return layer_i[int(len(layer_i)/2)]
    else:
        return int(display_label.shape[0]/2)

In [12]:
def plot_ct_scan_with_labels(loader, plot_size=50, cmap=plt.cm.gray):
    """accepts train_loader"""
    data = next(iter(loader))
    paths = data[2]
    display_labels = data[1].squeeze().detach().cpu().numpy()
    display_images = data[0].squeeze().detach().cpu().numpy() #1 batch of display_image 32,32,32,32
    f, plots = plt.subplots(int(display_images[0].shape[0] / 4) , 4, figsize=(plot_size, plot_size))
    f.suptitle('Label', fontsize=50, y=0.92)
    for img in range(0, display_images.shape[0]): #batch_size
        each_path = paths[img]
        each_label = display_labels[img]
        each_image = display_images[img]
        marked = find_marked(each_label)
        print(each_path)
        plots[int((img / 4)), int(img % 4)].imshow(each_image[marked,:,:], cmap="gray")
        label =  np.ma.masked_where((each_label < 0.05), each_label)
        plots[int((img / 4)), int(img % 4)].imshow(label[marked, :, :],cmap="hsv", alpha=0.25) 
        plots[int((img / 4)), int(img % 4)].axis('off')
        plots[int((img / 4)), int(img % 4)].set_title(str(each_path))        

if False:
    display_dataset = DataLoaderImg(folder_path = "train5",random_rotation = True, get_path=True)
    display_loader = DataLoader(display_dataset, batch_size = 32, shuffle=True, num_workers=0)
    plot_ct_scan_with_labels (display_loader)    


In [13]:
# display_dataset = DataLoaderImg(folder_path = "false_cand",random_rotation = True, get_path=True)
# display_loader = DataLoader(display_dataset, batch_size = 32, shuffle=True, num_workers=0)
# data = next(iter(display_loader))
# index = 0 #choose 0-31

In [14]:
# print(data[2][index])
# display_label = data[1][index].squeeze().detach().cpu().numpy()
# display_image = data[0][index].squeeze().detach().cpu().numpy() #1 batch

# def explore_3dimage(layer=find_marked(display_label)):
#     plt.figure(figsize=(10, 5))
#     plt.imshow(display_image[layer, :, :], cmap='gray')
#     label =  np.ma.masked_where((display_label < 0.05), display_label)
#     plt.imshow(label[layer, :, :], cmap="hsv", alpha=0.1);  #label อยู่ตรงนี้นะ
#     plt.title('Label', fontsize=20)
#     plt.axis('off')
#     return layer

# interact(explore_3dimage, layer=(0, display_image.shape[0]))
    
# index += 1 


This is my first project outside of a course
This time I'll be implementing 3d-conv in pytorch using U-net-like architecture

https://www.researchgate.net/figure/Convolutional-neural-network-CNN-architecture-based-on-UNET-Ronneberger-et-al_fig2_323597886

note to self: 
- If you didn't add super init -> will cause error model called before init (when defining LunaModel class)! (it creates a proxy for that subclass?)

In [15]:
train_on_gpu = torch.cuda.is_available()
device = torch.device("cuda" if train_on_gpu else "cpu")
print('GPU_available :',train_on_gpu)
print(torch.cuda.get_device_name(torch.cuda.current_device()))

GPU_available : True
GeForce RTX 2070 with Max-Q Design


In [16]:
class LunaBlockDown(nn.Module):
    def __init__(self, in_channels, conv_channels):
        super(LunaBlockDown, self).__init__()
        self.conv1 = nn.Conv3d(
            in_channels, conv_channels, kernel_size=3, padding=1, bias=True)
        self.relu1 = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv3d(
            conv_channels, conv_channels, kernel_size=3, padding=1, bias=True)
        self.relu2 = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool3d(2,2)
        self.batchnorm = nn.BatchNorm3d(conv_channels)
        
        
    def forward(self, input_batch):
        block_out = self.conv1(input_batch)
        block_out = self.relu1(block_out)
        block_out = self.conv2(block_out)
        block_out = self.batchnorm(block_out)
        block_out = self.relu2(block_out)
        
        return self.maxpool(block_out), block_out     

class LunaBlockUp(nn.Module):
    def __init__(self, in_channels, conv_channels):
        super(LunaBlockUp, self).__init__()
        self.t_conv_layer = nn.ConvTranspose3d(
            in_channels, in_channels,kernel_size =2, stride=2, padding=0, bias = False)
        self.conv1 = nn.Conv3d(
            in_channels, conv_channels, kernel_size=3, padding=1, bias=True)
        self.relu1 = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv3d(
            conv_channels, conv_channels, kernel_size=3, padding=1, bias=True)
        self.relu2 = nn.ReLU(inplace=True)
        self.batchnorm = nn.BatchNorm3d(conv_channels)
        
# output size: (32-1)*s -2p+k = 64

    def forward(self, input_batch):
        block_out = self.t_conv_layer(input_batch)
        block_out = self.conv1(block_out)
        block_out = self.batchnorm(block_out)
        block_out = self.relu1(block_out)
        block_out = self.conv2(block_out)
        block_out = self.batchnorm(block_out)
        block_out = self.relu2(block_out)
        
        return block_out
        

In [17]:
class LunaModel(nn.Module):
    def __init__(self, in_channels=1, conv_channels=32):
        super(LunaModel, self).__init__()
        self.block1 = LunaBlockDown(in_channels, conv_channels) 
        self.block2 = LunaBlockDown(conv_channels, conv_channels * 2) 
        self.block3 = LunaBlockDown(conv_channels * 2,conv_channels * 4) 
        self.block4 = LunaBlockUp(conv_channels * 4, conv_channels * 4)
        self.block5 = LunaBlockUp(conv_channels * 8, conv_channels*2)    
        self.block6 = LunaBlockUp(conv_channels*4, conv_channels)  
        self.conv1 = nn.Conv3d(
            conv_channels*2, conv_channels, kernel_size=3, padding=1, bias=True)
        self.relu1 = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv3d(
            conv_channels, conv_channels, kernel_size=3, padding=1, bias=True)
        self.relu2 = nn.ReLU(inplace=True)
        self.conv3 = nn.Conv3d(
            conv_channels, in_channels, kernel_size=1, padding=0, bias=True)
        self.batchnorm = nn.BatchNorm3d(conv_channels)
        self.dropout = nn.Dropout(p=0.2, inplace=True)

    
    def forward(self, input_batch):
        block_out, layer1 = self.block1(input_batch) #128x128x20
        block_out, layer2 = self.block2(block_out) #64x64x10
        block_out, layer3 = self.block3(block_out) #32x32x5
        block_out = self.block4(block_out)
        block_out = torch.cat((block_out, layer3), dim=1)
        block_out = self.dropout(block_out)
        block_out = self.block5(block_out)
        block_out = torch.cat((block_out, layer2), dim=1)
        block_out = self.dropout(block_out)
        block_out = self.block6(block_out)
        block_out = torch.cat((block_out, layer1), dim=1)
        block_out = self.dropout(block_out)
        block_out = self.conv1(block_out)
        block_out = self.batchnorm(block_out)
        block_out = self.relu1(block_out)
        block_out = self.conv2(block_out)
        block_out = self.batchnorm(block_out)
        block_out = self.relu2(block_out)
        block_out = self.conv3(block_out)
#         no batchnorm for last layer
        
#         return torch.sigmoid(block_out)    #if use dice_loss
        return block_out     #if use BCEWithLogitsLoss
    
    def _init_weights(self):
        for m in self.modules():
            if type(m) in {
                nn.Conv3d,
                nn.ConvTranspose3d
            }:nn.init.kaiming_normal_(
                m.weight.data, a=0, mode='fan_out', nonlinearity='relu', 
            ) 
            if m.bias is not None:
                fan_in, fan_out = nn.init._calculate_fan_in_and_fan_out(m.weight.data) 
                bound = 1 / math.sqrt(fan_out) 
                nn.init.normal_(m.bias, -bound, bound)

                
model = LunaModel()
# move everything to gpu
if train_on_gpu:
    model.cuda()
model
    
    
        

LunaModel(
  (block1): LunaBlockDown(
    (conv1): Conv3d(1, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (relu1): ReLU(inplace=True)
    (conv2): Conv3d(32, 32, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (relu2): ReLU(inplace=True)
    (maxpool): MaxPool3d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (batchnorm): BatchNorm3d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (block2): LunaBlockDown(
    (conv1): Conv3d(32, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (relu1): ReLU(inplace=True)
    (conv2): Conv3d(64, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (relu2): ReLU(inplace=True)
    (maxpool): MaxPool3d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (batchnorm): BatchNorm3d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (block3): LunaBlockDown(
    (conv1): Conv3d(64, 128, kernel_size=(3, 3,

In [18]:
pytorch_total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(pytorch_total_params)


3305217


In [19]:
# Soft dice loss https://www.jeremyjordan.me/semantic-segmentation/

In [20]:
import torch.optim as optim
from torch.utils.data.sampler import SubsetRandomSampler
from torch.autograd import Variable

# Note: soft dice loss not dice loss
class diceloss(torch.nn.Module):
    """
    Compute mean dice coefficient over all abnormality classes.

    Args:
        y_true (tensor): shape: (num_classes, x_dim, y_dim, z_dim)
        y_pred (tensor): tensor of predictions for all classes.
                                    shape: (num_classes, x_dim, y_dim, z_dim)
        axis (tuple): spatial axes to sum over when computing numerator and
                      denominator of dice coefficient.
                      Hint: pass this as the 'axis' argument to the K.sum
                            and K.mean functions.
        epsilon (float): small constant add to numerator and denominator to
                        avoid divide by 0 errors.
    Returns:
        dice_coefficient (float): computed value of dice coefficient.     
    """
    def init(self):
        super(diceLoss, self).init()
    def forward(self,y_true, y_pred, epsilon=0.00001):
        axis = tuple(range(1, len(y_pred.shape)-1)) 
        dice_numerator = torch.sum(y_pred*y_true,axis=axis)*2 + epsilon
        dice_denominator = torch.sum(y_true**2,axis=axis) + torch.sum(y_pred**2,axis=axis) + epsilon
        dice_loss = 1-torch.mean(dice_numerator/dice_denominator)
        return dice_loss
    


In [21]:
# compute confusion matrix
def compute_CM (pred, label, threshold=0.5):
    """Only accepts numpy.."""
    threshold = 0.5
    pred[pred >= threshold] = 1.0
    pred[pred < threshold] = 0.0
    pred = pred.detach().cpu().numpy()
    label = label.detach().cpu().numpy()
    tp = np.sum((pred == 1) & (label==1))
    tn = np.sum((pred == 0) & (label==0))
    fp = np.sum((pred == 1) & (label==0))
    fn = np.sum((pred == 0) & (label==1))
    accuracy = ((tp+tn)/(tp+tn+fp+fn))
    sensitivity = ((tp/(tp+fn)))
    specificity = (tn/(tn+fp))
    ppv = (tp/(tp+fp))
    return round(accuracy*100, 2),round(sensitivity*100, 2),round(specificity*100, 2),round(ppv*100, 2)


In [22]:
# Rectified Adam https://www.kaggle.com/dhananjay3/image-segmentation-from-scratch-in-pytorch
# RAdam better than Adam? https://medium.com/@lessw/new-state-of-the-art-ai-optimizer-rectified-adam-radam-5d854730807b
# note to self learning rate decay of 0.94 is too fast since my dataset is small (10 epochs = 0.50) -> adjusted to 0.972

In [23]:
num_epochs = 150
lr         = 1*1e-3

use_learning_rate_decay = True #set to false when use LR finder
if use_learning_rate_decay == True:
    optimizer = optim.Adam(model.parameters(), lr=lr)
    LRdecayRate = 0.972
    my_lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer=optimizer, gamma=LRdecayRate)
else:
    optimizer = optim.Adam(model.parameters())
        
# criterion = diceloss()
pos_weight = torch.ones([1]).to(device)*70 # 1 class and weight = 70, the number of neg_px is about 70 times that of pos_px in my training dataset
criterion = torch.nn.BCEWithLogitsLoss(pos_weight=pos_weight)

### Find optimal learning rate with : https://github.com/davidtvs/pytorch-lr-finder
*cylical learning rate vs adaptive learning rate?*

cyclical -> believes difficulty in minimizing the loss arises from "saddle points" rather than poor local minima.
Cyclical Learning Rates for Training Neural Networks by Leslie N. Smith https://arxiv.org/abs/1506.01186

The code from the link used cyclical learning rate to find optimal lr...
while I will just use simple leanring rate decay..

In [25]:
if use_learning_rate_decay == False:
    from torch.utils.data import DataLoader
    from torch_lr_finder import LRFinder

    model.load_state_dict(torch.load('save/save43.pt'))
    lr_finder = LRFinder(model, optimizer, criterion, device="cuda")
    lr_finder.range_test(valid_loader, end_lr=5*1e-5, num_iter=200, step_mode="exp")
    lr_finder.plot()
    lr_finder.reset()
    

    

### Optimal learning rate 
The optimal learning rate from code above is about 5*1e-4
![Optimal learning rate](model4lr.jpg)


# Start Training!

In [26]:
def print_and_send_line(text):
    try:
        line.sendtext(str(text))
    except:
        pass
    print(str(text))
    

In [28]:
model.load_state_dict(torch.load('save/save60.pt'))
print_and_send_line("Starting new training session")

losses = []

# valid_loss_min = np.Inf
valid_loss_min = 6  #for BCE loss(w=70)
# valid_loss_min = 2.3 #for Dice loss
CM_min = 141 #confusion matrix
print_every = 15
clip = 1
save_number = 1
total_time = time.time()
for e in range(num_epochs):
    start_time = time.time()
    train_loss = 0.0
    train_loss_small = 0.0
    valid_loss = 0.0
    
    ################### # train the model # ###################
    model.train() 
    for batch_i,(i, l) in enumerate(train_loader,1):
        optimizer.zero_grad()
        i, l = i.to(device), l.to(device)
        pred = model(i)
        loss = criterion(pred,l)
        loss.backward()
#         apply gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm = clip)
        optimizer.step()
        train_loss_small += loss.item()*i.size(0)
        train_loss += train_loss_small
        if batch_i%print_every == 0:
            print("Epoch: {}/{}, train_loss: {}".format(e+1,num_epochs, train_loss_small))
            try:
                line.sendtext("Epoch: {}/{}, train_loss: {}".format(e+1,num_epochs, train_loss_small))
            except:
                pass
            train_loss_small = 0.0
            print("--- %s seconds ---" % (time.time() - start_time))
            
    ###################### # validate the model # ###################### 
    model.eval()
    for (i,l) in valid_loader:
        i, l = i.to(device), l.to(device)
        pred = model(i)
        loss = criterion(pred,l)
        valid_loss += loss.item()*i.size(0)   
    
    my_lr_scheduler.step()
    train_loss = train_loss/len(train_loader.dataset)
    valid_loss = valid_loss/len(valid_loader.dataset)
    losses.append((train_loss, valid_loss))    
    accuracy, sensitivity, specificity, ppv = compute_CM(pred,l)
    
    print_and_send_line("Epoch: {}/{}, train_loss: {}, Validation_loss: {:.6f} ".format(e+1,num_epochs, train_loss, valid_loss))
    print_and_send_line("Accuracy: {}%, Sensitivity: {}%, Specificity: {}%, , PPV: {}%".format(accuracy, sensitivity, specificity, ppv))
    print_and_send_line("--- %s seconds ---" % (time.time() - start_time))   
    
    if valid_loss <= valid_loss_min:
        while os.path.isfile('save/save{}.pt'.format(save_number)):
            save_number += 1
        print_and_send_line ("val loss min decreased, Saving model...{}".format(save_number))
        torch.save(model.state_dict(), 'save/save{}.pt'.format(save_number))
        valid_loss_min = valid_loss
        
    CM =  sensitivity + ppv
    if CM > CM_min:
        while os.path.isfile('save/save{}.pt'.format(save_number)):
            save_number += 1
        print_and_send_line ("sum of params in confusion matrix increased, Saving model...{}".format(save_number))
        torch.save(model.state_dict(), 'save/save{}.pt'.format(save_number))
        CM_min = CM

back_up = "save/" + datetime.datetime.now().strftime("%d-%m-%Y_%H-%M") + ".pt"
torch.save(model.state_dict(), back_up)
print_and_send_line("Finished Training, Total time elapsed: ".format(time.time() - start_time))


Starting new training session
Epoch: 1/150, train_loss: 56.101011753082275
--- 14.523430585861206 seconds ---
Epoch: 1/150, train_loss: 19.294410973787308
--- 27.851077556610107 seconds ---
Epoch: 1/150, train_loss: 0.7109025748463782, Validation_loss: 1.359023 
Accuracy: 99.67%, Sensitivity: 39.09%, Specificity: 99.99%, , PPV: 96.12%
--- 42.92162847518921 seconds ---
val loss min decreased, Saving model...61
Epoch: 2/150, train_loss: 16.00044561177492
--- 13.394595623016357 seconds ---
Epoch: 2/150, train_loss: 6.078459993004799
--- 26.784590005874634 seconds ---
Epoch: 2/150, train_loss: 0.18504487616997778, Validation_loss: 1.182263 
Accuracy: 98.87%, Sensitivity: 0.0%, Specificity: 99.91%, , PPV: 0.0%
--- 41.83538341522217 seconds ---
val loss min decreased, Saving model...62
Epoch: 3/150, train_loss: 5.974815353751183
--- 13.397319793701172 seconds ---
Epoch: 3/150, train_loss: 8.684455767273903
--- 26.815130472183228 seconds ---
Epoch: 3/150, train_loss: 0.12205749487673695, Vali

  
  app.launch_new_instance()


Epoch: 5/150, train_loss: 0.08618957686491988, Validation_loss: 1.676223 
Accuracy: 100.0%, Sensitivity: nan%, Specificity: 100.0%, , PPV: nan%
--- 42.14767909049988 seconds ---
Epoch: 6/150, train_loss: 4.149849571287632
--- 13.699477195739746 seconds ---
Epoch: 6/150, train_loss: 2.9803215451538563
--- 27.190603256225586 seconds ---
Epoch: 6/150, train_loss: 0.06651183249936862, Validation_loss: 2.215629 
Accuracy: 96.9%, Sensitivity: 31.44%, Specificity: 99.98%, , PPV: 98.72%
--- 42.37597608566284 seconds ---
Epoch: 7/150, train_loss: 5.058951169252396
--- 13.49903130531311 seconds ---
Epoch: 7/150, train_loss: 5.3347784876823425
--- 26.969959259033203 seconds ---
Epoch: 7/150, train_loss: 0.07555288080422377, Validation_loss: 2.395963 
Accuracy: 99.15%, Sensitivity: 48.86%, Specificity: 99.98%, , PPV: 96.98%
--- 42.1494677066803 seconds ---
sum of params in confusion matrix increased, Saving model...63
Epoch: 8/150, train_loss: 5.453981753438711
--- 13.490033864974976 seconds ---
E

KeyboardInterrupt: 

In [None]:
# torch.save(model.state_dict(), 'save/save39.pt')

# Visualing Results

### loss

In [None]:
fig, ax = plt.subplots()
losses = np.array(losses)
plt.plot(losses.T[0], label='Train_loss', alpha=0.5)
plt.plot(losses.T[1], label='Valid_loss', alpha=0.5)
plt.title("Losses")
plt.legend()


### Preds in comparison to labels

In [None]:
model.load_state_dict(torch.load('save/save19.pt'))
# display_dataset = DataLoaderImg(folder_path = "test5",random_rotation = True, get_path=True)
display_dataset = DataLoaderImg(folder_path = "test5",random_rotation = True, get_path=True)
display_loader = DataLoader(display_dataset, batch_size = 50, shuffle=True, num_workers=0)
model.eval()
def plot_ct_scan_with_labels_and_pred(loader, plot_size=50, cmap=plt.cm.gray):
    """accepts train_loader"""
    data = next(iter(loader))
    i = data[0].to(device)
    pred = model(i).squeeze().detach().cpu().numpy()
    display_labels = data[1].squeeze().detach().cpu().numpy()
    display_images = i.squeeze().detach().cpu().numpy() #1 batch of display_image 32,32,32,32
    paths = data[2]

    f, plots = plt.subplots(int(display_images.shape[0] / 2) , 4, figsize=(plot_size, plot_size))
    f.suptitle('Red = Label, Yellow = Prediction', fontsize=50, y=0.92, x=0.2)
    for img in range(0, display_images.shape[0]): #batch_size
        each_path = paths[img]
        each_label = display_labels[img]
        each_image = display_images[img]
        each_pred = pred[img]
        marked = find_marked(each_label)
        print(each_path)
        
        plots[int((img / 2)), int(img % 2)*2].imshow(each_image[marked,:,:], cmap="gray")
        plots[int((img / 2)), (int(img % 2)*2)+1].imshow(each_image[marked,:,:], cmap="gray")
        label =  np.ma.masked_where((each_label < 0.05), each_label)
        pred_mask = np.ma.masked_where((each_pred < 0.05), each_pred)
        plots[int((img / 2)), int(img % 2)*2].imshow(label[marked, :, :],cmap="hsv", alpha=0.5) 
        plots[int((img / 2)), (int(img % 2)*2)+1].imshow(pred_mask[marked,:,:], cmap="Wistia", alpha=1.0)
        plots[int((img / 2)), int(img % 2)*2].axis('off')
        plots[int((img / 2)), (int(img % 2)*2)+1].axis('off')
        plots[int((img / 2)), int(img % 2)*2].set_title(str(each_path))        
        plt.subplots_adjust(wspace=0, hspace=0.2, left=0, right=0.4)


plot_ct_scan_with_labels_and_pred (display_loader)  

In [None]:
index = 31 #ใส่เลข 0-31

display_pred = pred[index].squeeze().detach().cpu().numpy()
display_label = l[index].squeeze().detach().cpu().numpy()
display_image = i[index].squeeze().detach().cpu().numpy()

In [None]:
def explore_3dimage(layer=find_marked(display_label)):
    plt.figure(figsize=(10, 5))
    plt.imshow(display_image[layer, :, :], cmap='gray');
    global display_pred
    mask = np.ma.masked_where((display_pred < 0.05), display_pred)
    plt.imshow(mask[layer, :, :], cmap="Wistia", alpha=0.5);   #mask อยู่ตรงนี้นะ
    label =  np.ma.masked_where((display_label < 0.05), display_label)
    plt.imshow(label[layer, :, :], cmap="hsv", alpha=0.5);  #label อยู่ตรงนี้นะ
    plt.title('Label', fontsize=20)
    plt.axis('off')
    return layer

interact(explore_3dimage, layer=(0, display_image.shape[0]))

# Find false positive candidates

Find the file containing the false positive candidates and move it to another location.

In [None]:
def find_false_cands(loader, plot_size=50, cmap=plt.cm.gray):
    """accepts train_loader"""
    data = next(iter(loader))
    i = data[0].to(device)
    pred = model(i).squeeze().detach().cpu().numpy()
    display_labels = data[1].squeeze().detach().cpu().numpy()
    display_images = i.squeeze().detach().cpu().numpy() #1 batch of display_image 32,32,32,32
    paths = data[2]
    all_paths = []
    for img in range(0, display_images.shape[0]): #batch_size
        each_path = paths[img]
        each_label = display_labels[img]
        each_image = display_images[img]
        each_pred = pred[img]
        marked = find_marked(each_pred)

        all_paths.append(each_path) #will delete files that are not moved
        for x in range(each_pred.shape[0]):
            test_mask = each_pred[x, :, :]
#             print(np.sum(test_mask))
            if np.sum(test_mask)>-15500:
                try:
                    shutil.move(each_path, 'train5-2')
                except:
                    pass
    for y in all_paths:
        try:
            os.remove(y)
        except:
            pass
        
# for i in range (0,1000):
#     model.load_state_dict(torch.load('save/save22.pt'))
#     display_dataset = DataLoaderImg(folder_path = "false_cand",random_rotation = True, get_path=True)
#     display_loader = DataLoader(display_dataset, batch_size = 50, shuffle=True, num_workers=0)
#     model.eval()
#     all_paths = find_false_cands (display_loader)  