## Install Libraries

In [1]:
!pip install torchsummary
!pip install torchgeometry
!pip install segmentation-models-pytorch



## Import Libraries

In [2]:
import os
import pandas as pd
import numpy as np
import cv2
from torchvision.io import read_image
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, random_split, DataLoader
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

from torchvision.transforms import ToTensor
from PIL import Image
import os

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision 
from torchvision import transforms
from torchinfo import summary
import timm
import segmentation_models_pytorch as smp
import wandb



In [3]:
!nvidia-smi -L

GPU 0: Tesla P100-PCIE-16GB (UUID: GPU-025592e3-86c8-d06f-3f41-0b97fec51bd9)


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

## Read data

In [5]:
images_path = "/kaggle/input/bkai-igh-neopolyp/train/train/"
image_path = []
TRAIN_DIR = '/kaggle/input/bkai-igh-neopolyp/train/train'
for root, dirs, files in os.walk(TRAIN_DIR):
    for file in files:
        path = os.path.join(root,file)
        image_path.append(path)
        
len(image_path)

1000

In [6]:
mask_path = []
TRAIN_MASK_DIR = '/kaggle/input/bkai-igh-neopolyp/train_gt/train_gt'
for root, dirs, files in os.walk(TRAIN_MASK_DIR):
    for file in files:
        path = os.path.join(root,file)
        mask_path.append(path)
        
len(mask_path)

1000

In [7]:
class DatasetCustom(Dataset):
    def __init__(self, img_dir, label_dir, resize=None, transform=None):
        self.img_dir = img_dir
        self.label_dir = label_dir
        self.resize = resize
        self.transform = transform
        self.images = os.listdir(self.img_dir)

    def __len__(self):
        return len(self.images)
    
    def read_mask(self, mask_path):
        image = cv2.imread(mask_path)
        image = cv2.resize(image, self.resize, interpolation=cv2.INTER_AREA)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

        lower_red1 = np.array([0, 100, 20])
        upper_red1 = np.array([10, 255, 255])
        lower_red2 = np.array([160,100,20])
        upper_red2 = np.array([179,255,255])
        
        lower_mask_red = cv2.inRange(image, lower_red1, upper_red1)
        upper_mask_red = cv2.inRange(image, lower_red2, upper_red2)
        
        red_mask = lower_mask_red + upper_mask_red
        red_mask[red_mask != 0] = 1

        green_mask = cv2.inRange(image, (36, 25, 25), (70, 255, 255))
        green_mask[green_mask != 0] = 2

        full_mask = cv2.bitwise_or(red_mask, green_mask)
        full_mask = np.expand_dims(full_mask, axis=-1) 
        full_mask = full_mask.astype(np.uint8)
        
        return full_mask

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.images[idx])
        label_path = os.path.join(self.label_dir, self.images[idx])
        image = cv2.imread(img_path)  #  BGR
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Convert to RGB
        label = self.read_mask(label_path)  
        image = cv2.resize(image, self.resize, interpolation=cv2.INTER_AREA)
        if self.transform:
            image = self.transform(image)
            
        return image, label

In [8]:
img_resize = (320, 320)

In [9]:
dataset = DatasetCustom(img_dir= TRAIN_DIR,
                             label_dir= TRAIN_MASK_DIR,
                             resize= img_resize,
                             transform = None)

## Augmentation

In [10]:
class AugmentDataset(Dataset):
    def __init__(self, dataset, transform=None, length_multiplier=1):
        self.dataset = dataset
        self.transform = transform
        self.length_multiplier = length_multiplier
        
    def __getitem__(self, idx):
        actual_idx = idx % len(self.dataset)
        image, label = self.dataset[actual_idx]
        if self.transform:
            transformed = self.transform(image=image, mask=label)
            image = transformed['image'].float()
            label = transformed['mask'].float()
            label = label.permute(2, 0, 1)
        return image, label

    def __len__(self):
        return len(self.dataset) * self.length_multiplier

In [11]:
train_transformation = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomGamma (gamma_limit=(70, 130), eps=None, always_apply=False, p=0.2),
    A.RGBShift(p=0.3, r_shift_limit=10, g_shift_limit=10, b_shift_limit=10),
    A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])

val_transformation = A.Compose([
    A.Normalize(mean=(0.485, 0.456, 0.406),std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])

In [12]:
# Define the split sizes
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size

# Define the batch size
batch_size = 32
# Randomly split the dataset
generator = torch.Generator().manual_seed(42)
train_dataset, val_dataset = random_split(dataset, [train_size, val_size], generator=generator)

train_dataset = AugmentDataset(train_dataset, transform=train_transformation, length_multiplier=2)
val_dataset = AugmentDataset(val_dataset, transform=val_transformation)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

## Define Model

In [13]:
import segmentation_models_pytorch as smp

model = smp.UnetPlusPlus(
    encoder_name="resnet34",        
    encoder_weights="imagenet",     
    in_channels=3,                  
    classes=3     
)

In [14]:
color_dict= {0: (0, 0, 0),
             1: (255, 0, 0),
             2: (0, 255, 0)}
def mask_to_rgb(mask, color_dict):
    output = np.zeros((mask.shape[0], mask.shape[1], 3))

    for k in color_dict.keys():
        output[mask==k] = color_dict[k]

    return np.uint8(output)    

In [15]:
del dataset
del train_dataset
del val_dataset

## Train

Define Dice loss function

In [16]:
import torch.nn.functional as F

class DiceLoss(nn.Module):
    def __init__(self, num_classes=3, epsilon=1e-6):
        super(DiceLoss, self).__init__()
        self.num_classes = num_classes
        self.epsilon = epsilon

    def forward(self, outputs, targets):
        outputs = F.softmax(outputs, dim=1)
        targets_one_hot = F.one_hot(targets, num_classes=self.num_classes).permute(0, 3, 1, 2).float()

        # Flatten the tensors
        outputs_flat = outputs.contiguous().view(-1, self.num_classes)
        targets_flat = targets_one_hot.contiguous().view(-1, self.num_classes)

        intersection = torch.sum(outputs_flat * targets_flat, dim=0)
        union = torch.sum(outputs_flat + targets_flat, dim=0)

        dice_loss = 1 - (2 * intersection + self.epsilon) / (union + self.epsilon)
        mean_dice_loss = dice_loss.mean()

        return mean_dice_loss

In [17]:
from tqdm import tqdm

model.to(device)
criterion_ce = nn.CrossEntropyLoss()
criterion_dice = DiceLoss(num_classes=3)
alpha = 0.5  # Weight for balancing the two losses
best_val_loss = 999

Make use of learning rate scheduler

In [18]:
import torch.optim as optim

# Assuming you've already defined your optimizer
learning_rate = 0.0001
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Initialize the scheduler
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, 
    mode='min',                 # We want to minimize the validation loss
    factor=0.5,                 # Reduce LR by a factor of 0.5
    patience=2,                 # Number of epochs with no improvement after which LR will be reduced
    verbose=True,               # Print a message when LR is updated
    threshold=0.0001,           # Threshold for measuring the new optimum
    threshold_mode='rel',       # Mode for threshold
    cooldown=0,                 # Number of epochs to wait before resuming normal operation after LR has been reduced
    min_lr=1e-6                 # Lower bound on the learning rate
)

In [19]:
import wandb

wandb.login(
    # set the wandb project where this run will be logged
#     project= "PolypSegment", 
    key = "e02f7703b40a2b3e0ab4801d4cb1d86b3b7327a6",
)

wandb.init(
    project="PolypSegment",
    config={
        "init_learning_rate": learning_rate,
        "batch_size": batch_size,
        "optimizer": "Adam",
        "loss_functions": ["CrossEntropy", "Dice"],
        "alpha": alpha
    }
)

# Add the model to WandB
wandb.watch(model, log="all")

[34m[1mwandb[0m: Currently logged in as: [33mtu7pham7[0m ([33mhustcollab[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


[]

In [28]:
num_epochs = 25
epoch_bar = tqdm(total=num_epochs, desc='Total Progress')

for epoch in range(num_epochs):
    model.train()
    train_loss = 0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        labels = labels.squeeze(dim=1).long()
        outputs = model(images)
    
        loss_ce = criterion_ce(outputs, labels)
        loss_dice = criterion_dice(outputs, labels)
        loss = alpha * loss_ce + (1 - alpha) * loss_dice

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        
#         del images, labels, outputs, loss
#         torch.cuda.empty_cache()
        
    model.eval()
    with torch.no_grad():
        val_loss = 0
        for images, labels in val_loader:
            images = images.to(device)
            labels = labels.to(device)
            labels = labels.squeeze(dim=1).long()
            
            outputs = model(images)

            val_loss += criterion_dice(outputs.float(),labels.long()).item()
            
#             del images, labels, outputs
#             torch.cuda.empty_cache()
            
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {val_loss/len(val_loader):.10f}")
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        checkpoint = { 
            'epoch': epoch,
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'loss': val_loss,
        }
        save_path = f'model.pth'
        torch.save(checkpoint, save_path)
        
    scheduler.step(val_loss)
    epoch_bar.update(1)
    wandb.log({'Val_loss': val_loss/len(val_loader),'Train_loss': train_loss/len(train_loader)})
epoch_bar.close()

Total Progress:   0%|          | 0/25 [00:00<?, ?it/s]

Epoch [1/25], Loss: 0.0645475425


Total Progress:   4%|▍         | 1/25 [01:45<42:14, 105.60s/it]

Epoch [2/25], Loss: 0.0536414395


Total Progress:   8%|▊         | 2/25 [03:29<40:06, 104.64s/it]

Epoch [3/25], Loss: 0.0452362081


Total Progress:  12%|█▏        | 3/25 [05:14<38:20, 104.58s/it]

Epoch [4/25], Loss: 0.0418191334


Total Progress:  16%|█▌        | 4/25 [06:59<36:42, 104.87s/it]

Epoch [5/25], Loss: 0.0373047348


Total Progress:  20%|██        | 5/25 [08:44<34:59, 104.99s/it]

Epoch [6/25], Loss: 0.0334124264


Total Progress:  24%|██▍       | 6/25 [10:29<33:13, 104.90s/it]

Epoch [7/25], Loss: 0.0328275489


Total Progress:  28%|██▊       | 7/25 [12:13<31:25, 104.73s/it]

Epoch [8/25], Loss: 0.0294407021


Total Progress:  32%|███▏      | 8/25 [13:58<29:40, 104.75s/it]

Epoch [9/25], Loss: 0.0284969024


Total Progress:  36%|███▌      | 9/25 [15:43<27:54, 104.68s/it]

Epoch [10/25], Loss: 0.0275730642


Total Progress:  40%|████      | 10/25 [17:28<26:12, 104.81s/it]

Epoch [11/25], Loss: 0.0240441484


Total Progress:  48%|████▊     | 12/25 [20:56<22:38, 104.52s/it]

Epoch [12/25], Loss: 0.0246858831
Epoch [13/25], Loss: 0.0231482735


Total Progress:  52%|█████▏    | 13/25 [22:41<20:52, 104.42s/it]

Epoch [14/25], Loss: 0.0228800215


Total Progress:  56%|█████▌    | 14/25 [24:24<19:06, 104.23s/it]

Epoch [15/25], Loss: 0.0220962231


Total Progress:  60%|██████    | 15/25 [26:09<17:22, 104.28s/it]

Epoch [16/25], Loss: 0.0198844164


Total Progress:  68%|██████▊   | 17/25 [29:38<13:56, 104.50s/it]

Epoch [17/25], Loss: 0.0243116168


Total Progress:  72%|███████▏  | 18/25 [31:22<12:09, 104.17s/it]

Epoch [18/25], Loss: 0.0207303856
Epoch [19/25], Loss: 0.0181293575


Total Progress:  80%|████████  | 20/25 [34:50<08:40, 104.10s/it]

Epoch [20/25], Loss: 0.0182054473


Total Progress:  84%|████████▍ | 21/25 [36:33<06:55, 103.92s/it]

Epoch [21/25], Loss: 0.0204908113
Epoch [22/25], Loss: 0.0160256995


Total Progress:  92%|█████████▏| 23/25 [40:01<03:27, 103.78s/it]

Epoch [23/25], Loss: 0.0161220210


Total Progress:  96%|█████████▌| 24/25 [41:44<01:43, 103.63s/it]

Epoch [24/25], Loss: 0.0192296287


Total Progress: 100%|██████████| 25/25 [43:27<00:00, 104.32s/it]

Epoch [25/25], Loss: 0.0223425322
Epoch 00030: reducing learning rate of group 0 to 5.0000e-05.





In [29]:
# model = smp.UnetPlusPlus(
#     encoder_name="resnet34",        
#     encoder_weights="imagenet",     
#     in_channels=3,                  
#     classes=3     
# )
checkpoint = torch.load('/kaggle/working/model.pth')
model.load_state_dict(checkpoint['model'])
model.to(device)

UnetPlusPlus(
  (encoder): ResNetEncoder(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=Tru

In [30]:
!mkdir prediction

mkdir: cannot create directory ‘prediction’: File exists


## Get Testset Prediction

In [31]:
model.eval()
for i in os.listdir("/kaggle/input/bkai-igh-neopolyp/test/test"):
    img_path = os.path.join("/kaggle/input/bkai-igh-neopolyp/test/test", i)
    ori_img = cv2.imread(img_path)
    ori_img = cv2.cvtColor(ori_img, cv2.COLOR_BGR2RGB)
    ori_h = ori_img.shape[0]
    ori_w = ori_img.shape[1]
    img = cv2.resize(ori_img, img_resize, interpolation=cv2.INTER_AREA)
    transformed = val_transformation(image=img)
    input_img = transformed["image"]
    input_img = input_img.unsqueeze(0).to(device)
    with torch.no_grad():
        output_mask = model(input_img).squeeze(0).cpu().numpy().transpose(1, 2, 0)
    mask = cv2.resize(output_mask, (ori_w, ori_h), interpolation=cv2.INTER_CUBIC)
    mask = np.argmax(mask, axis=2)
    mask_rgb = mask_to_rgb(mask, color_dict)
    mask_rgb = cv2.cvtColor(mask_rgb, cv2.COLOR_RGB2BGR)
    cv2.imwrite("prediction/{}".format(i), mask_rgb)
    # Clear variables to free memory
    del img, input_img, output_mask, mask, mask_rgb
    torch.cuda.empty_cache()

In [32]:
def rle_to_string(runs):
    return ' '.join(str(x) for x in runs)

def rle_encode_one_mask(mask):
    pixels = mask.flatten()
    pixels[pixels > 225] = 255
    pixels[pixels <= 225] = 0
    use_padding = False
    if pixels[0] or pixels[-1]:
        use_padding = True
        pixel_padded = np.zeros([len(pixels) + 2], dtype=pixels.dtype)
        pixel_padded[1:-1] = pixels
        pixels = pixel_padded
    rle = np.where(pixels[1:] != pixels[:-1])[0] + 2
    if use_padding:
        rle = rle - 1
    rle[1::2] = rle[1::2] - rle[:-1:2]
    
    return rle_to_string(rle)

def rle2mask(mask_rle, shape=(3,3)):
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T

def mask2string(dir):
    strings = []
    ids = []
    ws, hs = [[] for i in range(2)]
    for image_id in os.listdir(dir):
        id = image_id.split('.')[0]
        path = os.path.join(dir, image_id)
        print(path)
        img = cv2.imread(path)[:,:,::-1]
        h, w = img.shape[0], img.shape[1]
        for channel in range(2):
            ws.append(w)
            hs.append(h)
            ids.append(f'{id}_{channel}')
            string = rle_encode_one_mask(img[:,:,channel])
            strings.append(string)
    r = {
        'ids': ids,
        'strings': strings,
    }
    return r


MASK_DIR_PATH = '/kaggle/working/prediction'
dir = MASK_DIR_PATH
res = mask2string(dir)
df = pd.DataFrame(columns=['Id', 'Expected'])
df['Id'] = res['ids']
df['Expected'] = res['strings']

df.to_csv(r'output.csv', index=False)

/kaggle/working/prediction/39d6aad6bb0170a40ed32deef71fbe08.jpeg
/kaggle/working/prediction/8fa8625605da2023387fd56c04414eaa.jpeg
/kaggle/working/prediction/6b83ef461c2a337948a41964c1d4f50a.jpeg
/kaggle/working/prediction/0398846f67b5df7cdf3f33c3ca4d5060.jpeg
/kaggle/working/prediction/7330398846f67b5df7cdf3f33c3ca4d5.jpeg
/kaggle/working/prediction/0619ebebe9e9c9d00a4262b4fe4a5a95.jpeg
/kaggle/working/prediction/1ad4f13ccf1f4b331a412fc44655fb51.jpeg
/kaggle/working/prediction/3425b976973f13dd311a65d2b46d0a60.jpeg
/kaggle/working/prediction/e1797c77826f9a7021bab9fc73303988.jpeg
/kaggle/working/prediction/80c643782707d7c359e27888daefee82.jpeg
/kaggle/working/prediction/ad43fe2cd066b9fdbc3bbc04a3afe1f1.jpeg
/kaggle/working/prediction/63b8318ecf467d7ad048df39beb17636.jpeg
/kaggle/working/prediction/d694539ef2424a9218697283baa3657e.jpeg
/kaggle/working/prediction/30c2f4fc276ed9f178dc2f4af6266509.jpeg
/kaggle/working/prediction/5e8f14e1e0ae936de314f2d95e6c487f.jpeg
/kaggle/working/predictio

## Inference

In [25]:
# from torch.jit import load
# model = UNet()
# optimizer = optim.Adam(params=model.parameters(), lr=learning_rate)

# checkpoint = torch.load(pretrained_path)

In [26]:
# optimizer.load_state_dict(checkpoint['optimizer'])

In [27]:
# from collections import OrderedDict
# new_state_dict = OrderedDict()
# for k, v in checkpoint['model'].items():
#     name = k[7:] # remove `module.`
#     new_state_dict[name] = v
# # load params
# model.load_state_dict(new_state_dict)