In [1]:
%pip install -q segmentation-models-pytorch
%pip install -q timm

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
import pandas as pd
import numpy as np
import cv2
from torchvision.io import read_image
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, random_split, DataLoader, Subset
import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import os

import timm
import torch
import torch.nn as nn
import torch.optim as optim
import segmentation_models_pytorch as smp
import wandb
import torch.optim as optim
from torch.amp import autocast, GradScaler

  check_for_updates()


In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [4]:
class TestDataset(Dataset):
    def __init__(self, img_dir="path/to/data", resize = None, transform=None):
        self.img_dir = img_dir
        self.transform = transform
        self.resize = resize
        self.images = os.listdir(self.img_dir)

    def __len__(self):
        return len(self.images)
    

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.images[idx])
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        height, width, channels = image.shape
        image = cv2.resize(image, self.resize, interpolation=cv2.INTER_AREA)
        name =  os.path.basename(img_path)
        if self.transform:
            transformed = self.transform(image=image)
            image = transformed["image"]
        return image, height, width, name[:-5]

In [5]:
img_resize = (256, 256)
batch_size = 16
learning_rate = 0.0000625
alpha = 0.5  # Weight of cross entropy loss 
class_weights = [0.05, 0.25, 0.70]

In [6]:
val_transformation = A.Compose([
    A.Normalize(mean=(0.485, 0.456, 0.406),std=(0.229, 0.224, 0.225)),
    ToTensorV2(),
])
class UnNormalize(object):
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std
        
    def __call__(self, tensor):
        """
        Args:
            tensor (Tensor): Tensor image of size (C, H, W) to be normalized.
        Returns:
            Tensor: Normalized image.
        """
        for t, m, s in zip(tensor, self.mean, self.std):
            t.mul_(s).add_(m)
            # The normalize code -> t.sub_(m).div_(s)
        return tensor
    
unorm = UnNormalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))

In [50]:
test_dataset = TestDataset(img_dir="/kaggle/input/bkai-igh-neopolyp/test/test", resize=img_resize, transform=val_transformation)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

In [8]:
class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(DoubleConv, self).__init__()
        self.double_conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.LeakyReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.LeakyReLU(inplace=True),
        )

    def forward(self, x):
        return self.double_conv(x)
    
    
class DownBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(DownBlock, self).__init__()
        self.double_conv = DoubleConv(in_channels, out_channels)
        self.down_sample = nn.MaxPool2d(2)

    def forward(self, x):
        skip_out = self.double_conv(x)
        down_out = self.down_sample(skip_out)
        return (down_out, skip_out)

    
class UpBlock(nn.Module):
    def __init__(self, in_channels, out_channels, up_sample_mode):
        super(UpBlock, self).__init__()
        if up_sample_mode == 'conv_transpose':
            if out_channels*4 == in_channels:
                self.up_sample = nn.ConvTranspose2d(in_channels-out_channels*2, in_channels-out_channels*2, kernel_size=2, stride=2) 
            else:
                self.up_sample = nn.ConvTranspose2d(in_channels-out_channels, in_channels-out_channels, kernel_size=2, stride=2)
        else:
            self.up_sample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)
        self.double_conv = DoubleConv(in_channels, out_channels)

    def forward(self, down_input, skip_input):
#         print("down", down_input.shape)
#         print("skip", skip_input.shape)
        x = self.up_sample(down_input)
#         print("x",x.shape)
        x = torch.cat([x, skip_input], dim=1)
        return self.double_conv(x)
# ResUnet
class PolypModel(nn.Module):
    def __init__(self, out_classes=3, up_sample_mode='conv_transpose'):
        super().__init__()
        self.out_classes = out_classes
        self.backbone = timm.create_model("resnet152", pretrained=True, features_only=True)
#         self.down_conv1 = DownBlock(3, 64)
        self.down_conv1 = DownBlock(64, 128)
        self.down_conv2 = DownBlock(256, 512)
        self.down_conv3 = DownBlock(512, 1024)
        self.down_conv4 = DownBlock(1024, 2048)
        self.up_sample_mode = up_sample_mode
        self.block_neck = DoubleConv(2048, 1024)
        self.block_up1 = UpBlock(1024+1024, 512,self.up_sample_mode)
        self.block_up2 = UpBlock(512+512, 256,self.up_sample_mode)
        self.block_up3 = UpBlock(256+256, 128,self.up_sample_mode)
        self.block_up4 = UpBlock(128+64, 64,self.up_sample_mode)
        self.conv_last = nn.Conv2d(64, out_classes, kernel_size=1)
        self.upsample = nn.Upsample(scale_factor=2, mode="bilinear")
        self.final_activation = nn.Softmax(dim=1)

    def forward(self, x):
        x1, x2, x3, x4, x5 = self.backbone(x)
        x = self.block_neck(x5) # x (B, 1024, 8, 8)
        x = self.block_up1(x, x4)
        x = self.block_up2(x, x3)
        x = self.block_up3(x, x2)
        x = self.block_up4(x, x1 )
        x = self.conv_last(x)
        x = self.upsample(x)
        x = self.final_activation(x)
        return x

In [36]:
models = []

In [10]:
model = PolypModel()
checkpoint = torch.load('/kaggle/input/model-polyp/colorization_model.pth')
model.load_state_dict(checkpoint['model_state_dict'])
model = model.to(device)
model.eval()
models.append(model)

model.safetensors:   0%|          | 0.00/241M [00:00<?, ?B/s]

  checkpoint = torch.load('/kaggle/input/model-polyp/colorization_model.pth')


In [37]:
model = smp.UnetPlusPlus(
    encoder_name="timm-resnest200e",        
    # encoder_weights="imagenet",     
    in_channels=3,                  
    classes=3     
)
checkpoint = torch.load('/kaggle/input/unetplusplus-resnest/model.pth')
model.load_state_dict(checkpoint['model'])
model = model.to(device)
model.eval()
models.append(model)

  checkpoint = torch.load('/kaggle/input/unetplusplus-resnest/model.pth')


In [28]:
# model = smp.UnetPlusPlus(
#     encoder_name="resnet50",        
#     # encoder_weights="imagenet",     
#     in_channels=3,                  
#     classes=3     
# )
# checkpoint = torch.load('/kaggle/input/unet/pytorch/default/2/resnet50.pth')
# model.load_state_dict(checkpoint['model'])
# model = model.to(device)
# model.eval()
# models.append(model)

  checkpoint = torch.load('/kaggle/input/unet/pytorch/default/2/resnet50.pth')


In [38]:
model = smp.UnetPlusPlus(
    encoder_name="resnet34",        
    # encoder_weights="imagenet",     
    in_channels=3,                  
    classes=3     
)
checkpoint = torch.load('/kaggle/input/unet/pytorch/default/2/model.pth')
model.load_state_dict(checkpoint['model'])
model = model.to(device)
model.eval()
models.append(model)

  checkpoint = torch.load('/kaggle/input/unet/pytorch/default/2/model.pth')


In [41]:
model = smp.UnetPlusPlus(
    encoder_name="resnet152",        
    # encoder_weights="imagenet",     
    in_channels=3,                  
    classes=3     
)
checkpoint = torch.load('/kaggle/input/unetplusplus-resnet152/model.pth')
model.load_state_dict(checkpoint['model'])
model = model.to(device)
model.eval()
models.append(model)

  checkpoint = torch.load('/kaggle/input/unetplusplus-resnet152/model.pth')


In [54]:
import torch
import segmentation_models_pytorch as smp

# Set device

# Number of folds
n_splits = 5  # Adjust based on your cross-validation

# Model parameters (should match those used during training)
model_params = {
    'encoder_name': 'resnet34',        # Same encoder as in training
    'encoder_weights': None,           # No pretraining since weights are loaded
    'in_channels': 3,
    'classes': 3                       # Number of segmentation classes
}

# Load models from each fold
for fold in range(n_splits):
    model = smp.UnetPlusPlus(**model_params)
    model_path = f'/kaggle/input/unetplusplus-resnet34/model_fold_{fold + 1}.pth'
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    model.eval()  # Set model to evaluation mode
    models.append(model)

print(f'Loaded {len(models)} models for ensembling.')

  model.load_state_dict(torch.load(model_path, map_location=device))


Loaded 8 models for ensembling.


In [55]:
len(models)

8

In [49]:
color_dict= {0: (0, 0, 0),
             1: (255, 0, 0),
             2: (0, 255, 0)}
def mask_to_rgb(mask, color_dict):
    output = np.zeros((mask.shape[0], mask.shape[1], 3))

    for k in color_dict.keys():
        output[mask==k] = color_dict[k]

    return np.uint8(output)   

In [43]:
import cv2
import numpy as np

def Smoothed_img(image):
    resized_image = image
    smoothed_image = cv2.GaussianBlur(resized_image, (15, 15), 0)
    red_pixels = (smoothed_image[:, :, 2] > 100)
    green_pixels = (smoothed_image[:, :, 1] > 100)
    black_pixels = ~(red_pixels | green_pixels)
    smoothed_image[red_pixels] = [0, 0, 255]
    smoothed_image[green_pixels] = [0, 255, 0]
    smoothed_image[black_pixels] = [0, 0, 0]

    pixel_values = np.array(smoothed_image)
    # unique_values = np.unique(pixel_values)
    # print(unique_values)
    return smoothed_image

In [15]:
!mkdir prediction

In [56]:
with torch.no_grad():
    for image, h, w, name in test_loader:
        outputs_list = []
        for model in models:
            outputs = model(image.to(device))
            outputs = outputs.squeeze().cpu().numpy()
            outputs_list.append(outputs)
        
        # Average the class predictions
        avg_outputs = np.mean(outputs_list, axis=0)
        avg_outputs = np.argmax(avg_outputs, axis=0)
        avg_outputs = mask_to_rgb(avg_outputs, color_dict)
        
        w = w.item()
        h = h.item()
        new_size = (w, h)
        resized_image = cv2.resize(avg_outputs, new_size, interpolation=cv2.INTER_CUBIC)
        resized_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB)
        resized_image = Smoothed_img(resized_image)
        cv2.imwrite(f"prediction/{name[0]}.png", resized_image)
        

In [57]:
def rle_to_string(runs):
    return ' '.join(str(x) for x in runs)

def rle_encode_one_mask(mask):
    pixels = mask.flatten()
    pixels[pixels > 225] = 255
    pixels[pixels <= 225] = 0
    use_padding = False
    if pixels[0] or pixels[-1]:
        use_padding = True
        pixel_padded = np.zeros([len(pixels) + 2], dtype=pixels.dtype)
        pixel_padded[1:-1] = pixels
        pixels = pixel_padded
    rle = np.where(pixels[1:] != pixels[:-1])[0] + 2
    if use_padding:
        rle = rle - 1
    rle[1::2] = rle[1::2] - rle[:-1:2]
    
    return rle_to_string(rle)

def rle2mask(mask_rle, shape=(3,3)):
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T

def mask2string(dir):
    strings = []
    ids = []
    ws, hs = [[] for i in range(2)]
    for image_id in os.listdir(dir):
        id = image_id.split('.')[0]
        path = os.path.join(dir, image_id)
        # print(path)
        img = cv2.imread(path)[:,:,::-1]
        h, w = img.shape[0], img.shape[1]
        for channel in range(2):
            ws.append(w)
            hs.append(h)
            ids.append(f'{id}_{channel}')
            string = rle_encode_one_mask(img[:,:,channel])
            strings.append(string)
    r = {
        'ids': ids,
        'strings': strings,
    }
    return r


MASK_DIR_PATH = '/kaggle/working/prediction'
dir = MASK_DIR_PATH
res = mask2string(dir)
df = pd.DataFrame(columns=['Id', 'Expected'])
df['Id'] = res['ids']
df['Expected'] = res['strings']

df.to_csv(r'output.csv', index=False)