In [None]:
!pip install -q kaggle
import os
user = 'hyeongihong' # Your user
key = '*' # Your key

if '.kaggle' not in os.listdir('/root'):
    !mkdir ~/.kaggle
!touch /root/.kaggle/kaggle.json
!chmod 666 /root/.kaggle/kaggle.json
with open('/root/.kaggle/kaggle.json', 'w') as f:
    f.write('{"username":"%s","key":"%s"}' % (user, key))
!chmod 600 /root/.kaggle/kaggle.json


In [None]:
!kaggle competitions download -c bkai-igh-neopolyp

Downloading bkai-igh-neopolyp.zip to /content
100% 314M/314M [00:15<00:00, 21.1MB/s]
100% 314M/314M [00:15<00:00, 20.7MB/s]


In [None]:
!unzip /content/bkai-igh-neopolyp.zip -d /content/dataset/

Archive:  /content/bkai-igh-neopolyp.zip
  inflating: /content/dataset/sample_submission.csv  
  inflating: /content/dataset/test/test/019410b1fcf0625f608b4ce97629ab55.jpeg  
  inflating: /content/dataset/test/test/02fa602bb3c7abacdbd7e6afd56ea7bc.jpeg  
  inflating: /content/dataset/test/test/0398846f67b5df7cdf3f33c3ca4d5060.jpeg  
  inflating: /content/dataset/test/test/05734fbeedd0f9da760db74a29abdb04.jpeg  
  inflating: /content/dataset/test/test/05b78a91391adc0bb223c4eaf3372eae.jpeg  
  inflating: /content/dataset/test/test/0619ebebe9e9c9d00a4262b4fe4a5a95.jpeg  
  inflating: /content/dataset/test/test/0626ab4ec3d46e602b296cc5cfd263f1.jpeg  
  inflating: /content/dataset/test/test/0a0317371a966bf4b3466463a3c64db1.jpeg  
  inflating: /content/dataset/test/test/0a5f3601ad4f13ccf1f4b331a412fc44.jpeg  
  inflating: /content/dataset/test/test/0af3feff05dec1eb3a70b145a7d8d3b6.jpeg  
  inflating: /content/dataset/test/test/0fca6a4248a41e8db8b4ed633b456aaa.jpeg  
  inflating: /content/dat

In [None]:
!pip install timm

Collecting timm
  Downloading timm-0.9.7-py3-none-any.whl (2.2 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/2.2 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.1/2.2 MB[0m [31m3.0 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m2.2/2.2 MB[0m [31m36.2 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m29.7 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub (from timm)
  Downloading huggingface_hub-0.18.0-py3-none-any.whl (301 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m35.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors (from timm)
  Downloading safetensors-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 M

In [None]:

import torch
import torch.nn as nn
import torch.nn.functional as F

from timm.models.layers import DropPath
from timm.models.registry import register_model

class DWConv(nn.Module):
    def __init__(self, dim):
        super(DWConv, self).__init__()
        self.dwconv = nn.Conv2d(dim, dim, 3, 1, 1, groups=dim)

    def forward(self, x, H, W):
        B,N,C = x.shape
        x     = x.transpose(1, 2).view(B, C, H, W)
        x     = self.dwconv(x)
        x     = x.flatten(2).transpose(1, 2)
        return x

class Mlp(nn.Module):
    def __init__(self, in_features, hidden_features):
        super().__init__()
        self.fc1        = nn.Linear(in_features, hidden_features)
        self.dwconv     = DWConv(hidden_features)
        self.fc2        = nn.Linear(hidden_features, in_features)

    def forward(self, x, H, W):
        x = self.fc1(x)
        x = F.gelu(self.dwconv(x, H, W))
        x = self.fc2(x)
        return x

class Attention(nn.Module):
    def __init__(self, dim, num_heads, sr_ratio):
        super().__init__()
        assert dim % num_heads == 0, f"dim {dim} should be divided by num_heads {num_heads}."

        self.num_heads = num_heads
        self.scale     = (dim//num_heads)**(-0.5)
        self.q         = nn.Linear(dim, dim)
        self.kv        = nn.Linear(dim, dim*2)
        self.proj      = nn.Linear(dim, dim)
        self.sr_ratio  = sr_ratio
        if sr_ratio > 1:
            self.sr    = nn.Conv2d(dim, dim, kernel_size=sr_ratio, stride=sr_ratio)
            self.norm  = nn.LayerNorm(dim)

    def forward(self, x, H, W):
        B, N, C = x.shape
        q       = self.q(x).reshape(B, N, self.num_heads, C//self.num_heads).permute(0, 2, 1, 3)

        if self.sr_ratio > 1:
            x_ = x.permute(0, 2, 1).reshape(B, C, H, W)
            x_ = self.sr(x_).reshape(B, C, -1).permute(0, 2, 1)
            x_ = self.norm(x_)
            kv = self.kv(x_).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
        else:
            kv = self.kv(x).reshape(B, -1, 2, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)

        k, v = kv[0], kv[1]
        attn = (q @ k.transpose(-2, -1)) * self.scale
        attn = attn.softmax(dim=-1)
        x    = (attn @ v).transpose(1, 2).reshape(B, N, C)
        x    = self.proj(x)
        return x

class Block(nn.Module):
    def __init__(self, dim, num_heads, mlp_ratio, drop_path, sr_ratio):
        super().__init__()
        self.norm1     = nn.LayerNorm(dim, eps=1e-6)
        self.attn      = Attention(dim, num_heads=num_heads, sr_ratio=sr_ratio)
        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
        self.norm2     = nn.LayerNorm(dim, eps=1e-6)
        self.mlp       = Mlp(in_features=dim, hidden_features=int(dim*mlp_ratio))

    def forward(self, x, H, W):
        x = x + self.drop_path(self.attn(self.norm1(x), H, W))
        x = x + self.drop_path(self.mlp(self.norm2(x), H, W))
        return x

class OverlapPatchEmbed(nn.Module):
    def __init__(self, patch_size, stride, in_chans, embed_dim):
        super().__init__()
        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=stride, padding=(patch_size//2, patch_size//2))
        self.norm = nn.LayerNorm(embed_dim)

    def forward(self, x):
        x       = self.proj(x)
        B,C,H,W = x.shape
        x       = x.flatten(2).transpose(1, 2)
        x       = self.norm(x)
        return x, H, W

class PVT(nn.Module):
    def __init__(self, embed_dims, mlp_ratios, depths, snapshot, sr_ratios=[8, 4, 2, 1]):
        super().__init__()
        self.depths       = depths
        self.snapshot     = snapshot
        # patch_embed
        self.patch_embed1 = OverlapPatchEmbed(patch_size=7, stride=4, in_chans=3,             embed_dim=embed_dims[0])
        self.patch_embed2 = OverlapPatchEmbed(patch_size=3, stride=2, in_chans=embed_dims[0], embed_dim=embed_dims[1])
        self.patch_embed3 = OverlapPatchEmbed(patch_size=3, stride=2, in_chans=embed_dims[1], embed_dim=embed_dims[2])
        self.patch_embed4 = OverlapPatchEmbed(patch_size=3, stride=2, in_chans=embed_dims[2], embed_dim=embed_dims[3])

        # transformer encoder
        dpr = [x.item() for x in torch.linspace(0, 0.1, sum(depths))]  # stochastic depth decay rule
        cur = 0
        self.block1 = nn.ModuleList([Block(dim=embed_dims[0], num_heads=1, mlp_ratio=mlp_ratios[0], drop_path=dpr[cur + i], sr_ratio=sr_ratios[0]) for i in range(depths[0])])
        self.norm1  = nn.LayerNorm(embed_dims[0], eps=1e-6)

        cur += depths[0]
        self.block2 = nn.ModuleList([Block(dim=embed_dims[1], num_heads=2, mlp_ratio=mlp_ratios[1], drop_path=dpr[cur + i], sr_ratio=sr_ratios[1]) for i in range(depths[1])])
        self.norm2  = nn.LayerNorm(embed_dims[1], eps=1e-6)

        cur += depths[1]
        self.block3 = nn.ModuleList([Block(dim=embed_dims[2], num_heads=5, mlp_ratio=mlp_ratios[2], drop_path=dpr[cur + i], sr_ratio=sr_ratios[2]) for i in range(depths[2])])
        self.norm3  = nn.LayerNorm(embed_dims[2], eps=1e-6)

        cur += depths[2]
        self.block4 = nn.ModuleList([Block(dim=embed_dims[3], num_heads=8, mlp_ratio=mlp_ratios[3], drop_path=dpr[cur + i], sr_ratio=sr_ratios[3]) for i in range(depths[3])])
        self.norm4  = nn.LayerNorm(embed_dims[3], eps=1e-6)

    @torch.jit.ignore
    def no_weight_decay(self):
        return {'pos_embed1', 'pos_embed2', 'pos_embed3', 'pos_embed4', 'cls_token'}  # has pos_embed may be better

    def forward(self, x):
        B = x.shape[0]
        # stage 1
        out1, H, W = self.patch_embed1(x)
        for i, blk in enumerate(self.block1):
            out1 = blk(out1, H, W)
        out1 = self.norm1(out1).reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()

        # stage 2
        out2, H, W = self.patch_embed2(out1)
        for i, blk in enumerate(self.block2):
            out2 = blk(out2, H, W)
        out2 = self.norm2(out2).reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()

        # stage 3
        out3, H, W = self.patch_embed3(out2)
        for i, blk in enumerate(self.block3):
            out3 = blk(out3, H, W)
        out3 = self.norm3(out3).reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()

        # stage 4
        out4, H, W = self.patch_embed4(out3)
        for i, blk in enumerate(self.block4):
            out4 = blk(out4, H, W)
        out4 = self.norm4(out4).reshape(B, H, W, -1).permute(0, 3, 1, 2).contiguous()
        return out1, out2, out3, out4

    def initialize(self):
        self.load_state_dict(torch.load(self.snapshot), strict=False)



def pvt_v2_b2():
    return PVT(embed_dims=[64, 128, 320, 512], mlp_ratios=[8, 8, 4, 4], depths=[3, 4, 6, 3], snapshot='../pretrain/pvt_v2_b2.pth')


In [None]:
IMAGE_WIDTH = 256
IMAGE_HEIGHT = 256

def read_mask(mask_path):
    image = cv2.imread(mask_path)
    image = cv2.resize(image, (IMAGE_WIDTH, IMAGE_HEIGHT))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    # lower boundary RED color range values; Hue (0 - 10)
    lower1 = np.array([0, 100, 20])
    upper1 = np.array([10, 255, 255])

    # upper boundary RED color range values; Hue (160 - 180)
    lower2 = np.array([160,100,20])
    upper2 = np.array([179,255,255])

    lower_mask = cv2.inRange(image, lower1, upper1)
    upper_mask = cv2.inRange(image, lower2, upper2)

    red_mask = lower_mask + upper_mask;
    red_mask[red_mask != 0] = 2

    # boundary GREEN color range values; Hue (36 - 70)
    green_mask = cv2.inRange(image, (36, 25, 25), (70, 255,255))
    green_mask[green_mask != 0] = 1

    full_mask = cv2.bitwise_or(red_mask, green_mask)
    full_mask = full_mask.astype(np.uint8)
    return full_mask

In [None]:
def decode_mask(pred_mask):
    decoded_mask = np.zeros((pred_mask.shape[0], pred_mask.shape[1], 3), dtype=np.unit8)
    decoded_mask[pred_mask == 0] = [0, 0, 0]
    decoded_mask[pred_mask == 1] = [0, 255, 0] ## Green
    decoded_mask[pred_mask == 2] = [255, 0, 0] ## Red

    return decoded_mask

In [None]:
def train_img_mask_transform(transform, image, mask):
    x, y = copy.deepcopy(image), copy.deepcopy(mask)
    transformed = transform(image=x, mask=y)
    transformed_image, transformed_mask = transformed["image"], transformed["mask"]

    return transformed_image, transformed_mask


In [None]:
def mosaic_augmentation(piecies, size):
    h, w = size, size
    mosaic_img = np.zeros((h, w, 3), dtype=np.uint8)
    mosaic_mask = np.zeros((h, w, 3), dtype=np.uint8)

    # cx, cy = random.randint(w//4, 3*w//4), random.randint(h//4, 3*h//4)
    cx, cy = w // 2, h // 2

    indices = [0, 1, 2, 3]
    random.shuffle(indices)
    for i, index in enumerate(indices):
        piece_image, piece_mask = piecies[index][0], piecies[index][1]

        if i == 0:
            mosaic_img[:cy, :cx] = cv2.resize(piece_image, (cx, cy))
            mosaic_mask[:cy, :cx] = cv2.resize(piece_mask, (cx, cy))
        elif i == 1:
            mosaic_img[:cy, cx:] = cv2.resize(piece_image, (w-cx, cy))
            mosaic_mask[:cy, cx:] = cv2.resize(piece_mask, (w-cx, cy))
        elif i == 2:
            mosaic_img[cy:, :cx] = cv2.resize(piece_image, (cx, h-cy))
            mosaic_mask[cy:, :cx] = cv2.resize(piece_mask, (cx, h-cy))
        elif i == 3:
            mosaic_img[cy:, cx:] = cv2.resize(piece_image, (w-cx, h-cy))
            mosaic_mask[cy:, cx:] = cv2.resize(piece_mask, (w-cx, h-cy))

    return mosaic_img, mosaic_mask

In [None]:
def rand_bbox(size, lam):
    W = size[1]
    H = size[0]
    cut_rat = np.sqrt(1. - lam)
    cut_w = np.int64(W * cut_rat)
    cut_h = np.int64(H * cut_rat)

    cx = np.random.randint(W)
    cy = np.random.randint(H)

    bbx1 = np.clip(cx - cut_w // 2, 0, W)
    bby1 = np.clip(cy - cut_h // 2, 0, H)
    bbx2 = np.clip(cx + cut_w // 2, 0, W)
    bby2 = np.clip(cy + cut_h // 2, 0, H)

    return bbx1, bby1, bbx2, bby2


def cutmix_augmentation(image1, mask1, image2, mask2):
    i1, i2, m1, m2 = copy.deepcopy(image1), copy.deepcopy(image2), copy.deepcopy(mask1), copy.deepcopy(mask2)
    lam = np.clip(np.random.beta(1.0, 1.0), 0.2, 0.8)
    bbx1, bby1, bbx2, bby2 = rand_bbox(i1.shape, lam)

    i1[bbx1:bbx2, bby1:bby2] = i2[bbx1:bbx2, bby1:bby2]
    m1[bbx1:bbx2, bby1:bby2] = m2[bbx1:bbx2, bby1:bby2]

    return i1, m1


def spatially_exclusive_pasting(image, mask, alpha=0.7, iterations=10):
    target_image, target_mask = copy.deepcopy(image), copy.deepcopy(mask)
    L_gray = cv2.cvtColor(target_mask, cv2.COLOR_BGR2GRAY)

    hs, ws = np.where(L_gray == 1)
    if not hs.any() or not ws.any():
        return target_mask

    he, we = hs.max(), ws.max()
    hs, ws = hs.min(), ws.min()

    Lf_gray = L_gray[hs:he, ws:we]
    If = target_image[hs:he, ws:we]
    Lf_color = target_mask[hs:he, ws:we]

    M = np.random.rand(*target_image.shape[:2])
    M[L_gray == 1] = float('inf')

    height, width = he - hs, we - ws

    for _ in range(iterations):
        px, py = np.unravel_index(M.argmin(), M.shape)
        candidate_area = (slice(px, px + height), slice(py, py + width))

        if candidate_area[0].stop > target_image.shape[0] or candidate_area[1].stop > target_image.shape[1]:
            M[px, py] = float('inf')
            continue

        if np.any(L_gray[candidate_area] & Lf_gray):
            M[candidate_area] = float('inf')
            continue

        target_image[candidate_area] = alpha * target_image[candidate_area] + (1 - alpha) * If
        target_mask[candidate_area] = alpha * target_mask[candidate_area] + (1 - alpha) * Lf_color
        L_gray[candidate_area] = cv2.cvtColor(target_mask[candidate_area], cv2.COLOR_BGR2GRAY)

        M[candidate_area] = float('inf')

        kernel = np.ones((3, 3), np.float32) / 9
        M = cv2.filter2D(M, -1, kernel)

    return target_image, target_mask

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Fusion(nn.Module):
    def __init__(self, channels):
        super(Fusion, self).__init__()
        self.linear2 = nn.Sequential(nn.Conv2d(channels[1], 64, kernel_size=1, bias=False), nn.BatchNorm2d(64))
        self.linear3 = nn.Sequential(nn.Conv2d(channels[2], 64, kernel_size=1, bias=False), nn.BatchNorm2d(64))
        self.linear4 = nn.Sequential(nn.Conv2d(channels[3], 64, kernel_size=1, bias=False), nn.BatchNorm2d(64))

    def forward(self, x1, x2, x3, x4):
        x2, x3, x4   = self.linear2(x2), self.linear3(x3), self.linear4(x4)
        x4           = F.interpolate(x4, size=x2.size()[2:], mode='bilinear')
        x3           = F.interpolate(x3, size=x2.size()[2:], mode='bilinear')
        out          = x2*x3*x4
        return out

    def initialize(self):
        weight_init(self)

class WeakPolyp(nn.Module):
    def __init__(self, cfg):
        super(WeakPolyp, self).__init__()
        if cfg.backbone=='res2net50':
            self.backbone = Res2Net50()
            channels      = [256, 512, 1024, 2048]
        if cfg.backbone=='pvt_v2_b2':
            self.backbone = pvt_v2_b2()
            channels      = [64, 128, 320, 512]

        self.fusion       = Fusion(channels)
        self.linear       = nn.Conv2d(64, 1, kernel_size=1)

        ## initialize
        if cfg.mode=='train':
            weight_init(self)
        elif cfg.mode=='test':
            self.load_state_dict(torch.load(cfg.snapshot))
        else:
            raise ValueError

    def forward(self, x):
        x1,x2,x3,x4 = self.backbone(x)
        pred        = self.fusion(x1,x2,x3,x4)
        pred        = self.linear(pred)
        return pred


In [None]:

import math
import torch
import torch.nn as nn
import torch.nn.functional as F

class Bottle2neck(nn.Module):
    expansion = 4
    def __init__(self, inplanes, planes, stride=1, downsample=None, baseWidth=26, scale=4, stype='normal'):
        super(Bottle2neck, self).__init__()
        width      = int(math.floor(planes*(baseWidth/64.0)))
        self.conv1 = nn.Conv2d(inplanes, width*scale, kernel_size=1, bias=False)
        self.bn1   = nn.BatchNorm2d(width*scale)
        self.nums  = 1 if scale == 1 else scale - 1
        if stype == 'stage':
            self.pool = nn.AvgPool2d(kernel_size=3, stride=stride, padding=1)
        convs, bns = [], []
        for i in range(self.nums):
            convs.append(nn.Conv2d(width, width, kernel_size=3, stride=stride, padding=1, bias=False))
            bns.append(nn.BatchNorm2d(width))
        self.convs = nn.ModuleList(convs)
        self.bns   = nn.ModuleList(bns)
        self.conv3 = nn.Conv2d(width * scale, planes * self.expansion, kernel_size=1, bias=False)
        self.bn3   = nn.BatchNorm2d(planes * self.expansion)
        self.downsample = downsample
        self.stype = stype
        self.scale = scale
        self.width = width

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)), inplace=True)
        spx = torch.split(out, self.width, 1)
        for i in range(self.nums):
            sp  = spx[i] if i == 0 or self.stype == 'stage' else sp + spx[i]
            sp  = self.convs[i](sp)
            sp  = F.relu(self.bns[i](sp), inplace=True)
            out = sp if i == 0 else torch.cat((out, sp), 1)
        if self.scale != 1 and self.stype == 'normal':
            out = torch.cat((out, spx[self.nums]), 1)
        elif self.scale != 1 and self.stype == 'stage':
            out = torch.cat((out, self.pool(spx[self.nums])), 1)

        out = self.bn3(self.conv3(out))
        if self.downsample is not None:
            x = self.downsample(x)
        return F.relu(out+x, inplace=True)


class Res2Net(nn.Module):
    def __init__(self, layers, snapshot, baseWidth=26, scale=4):
        super(Res2Net, self).__init__()
        self.inplanes  = 64
        self.snapshot  = snapshot
        self.baseWidth = baseWidth
        self.scale     = scale
        self.conv1     = nn.Sequential(
                                nn.Conv2d(3, 32, 3, 2, 1, bias=False),
                                nn.BatchNorm2d(32),
                                nn.ReLU(inplace=True),
                                nn.Conv2d(32, 32, 3, 1, 1, bias=False),
                                nn.BatchNorm2d(32),
                                nn.ReLU(inplace=True),
                                nn.Conv2d(32, 64, 3, 1, 1, bias=False)
                            )
        self.bn1    = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(Bottle2neck, 64, layers[0])
        self.layer2 = self._make_layer(Bottle2neck, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(Bottle2neck, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(Bottle2neck, 512, layers[3], stride=2)

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                nn.AvgPool2d(kernel_size=stride, stride=stride, ceil_mode=True, count_include_pad=False),
                nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=1, bias=False),
                nn.BatchNorm2d(planes * block.expansion),
            )

        layers        = [block(self.inplanes, planes, stride, downsample=downsample, stype='stage', baseWidth=self.baseWidth, scale=self.scale)]
        self.inplanes = planes * block.expansion
        for i in range(1, blocks):
            layers.append(block(self.inplanes, planes, baseWidth=self.baseWidth, scale=self.scale))
        return nn.Sequential(*layers)

    def forward(self, x):
        out1 = F.relu(self.bn1(self.conv1(x)), inplace=True)
        out1 = F.max_pool2d(out1, kernel_size=3, stride=2, padding=1)
        out2 = self.layer1(out1)
        out3 = self.layer2(out2)
        out4 = self.layer3(out3)
        out5 = self.layer4(out4)
        return out2, out3, out4, out5

    def initialize(self):
        self.load_state_dict(torch.load(self.snapshot), strict=False)


def Res2Net50():
    return Res2Net([3, 4, 6, 3], '/content/drive/MyDrive/WeakPolyp-main/source/pretrain/res2net50_v1b_26w_4s-3cf99910.pth')


In [None]:

import os
import cv2
import numpy as np
import albumentations as A
import matplotlib.pyplot as plt
from albumentations.pytorch import ToTensorV2

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset

class TrainData(Dataset):
    def __init__(self, cfg):
        self.samples   = []
        testsplit = 700
        data = os.listdir(cfg.train_image+'/')[:700]
        for name in data:
            image = cfg.train_image + '/' + name
            mask = cfg.train_mask + '/' + name
            self.samples.append((image, mask))

        self.transform = A.Compose([
            A.Normalize(),
            A.Resize(352, 352),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.RandomRotate90(p=0.5),
            ToTensorV2()
        ])

    def __getitem__(self, idx):
        image_name, mask_name = self.samples[idx]
        image, mask           = cv2.imread(image_name), cv2.imread(mask_name)
        image, mask           = cv2.cvtColor(image, cv2.COLOR_BGR2RGB), np.float32(mask>128)
        pair                  = self.transform(image=image, mask=mask)
        return pair['image'], pair['mask'].permute(2,0,1)

    def __len__(self):
        return len(self.samples)


class TestData(Dataset):
    def __init__(self, cfg):
        self.samples  = []
        self.cfg      = cfg

        testsplit = 700
        data = os.listdir(cfg.train_image+'/')[testsplit:]

        for name in data:
            image = cfg.test_image + '/' + name
            mask = cfg.test_mask + '/' + name
            self.samples.append((image, mask))
        print('Test Data: %s,   Test Samples: %s'%(cfg.test_image, len(self.samples)))

        self.transform = A.Compose([
            A.Normalize(),
            A.Resize(320, 320),
            ToTensorV2()
        ])

    def __getitem__(self, idx):
        image_name, mask_name = self.samples[idx]
        image, mask           = cv2.imread(image_name), cv2.imread(mask_name, cv2.IMREAD_GRAYSCALE)
        image, mask           = cv2.cvtColor(image, cv2.COLOR_BGR2RGB), np.float32(mask>128)
        pair                  = self.transform(image=image, mask=mask)
        return pair['image'], pair['mask'], mask_name

    def __len__(self):
        return len(self.samples)

def clip_gradient(optimizer, grad_clip):
    for group in optimizer.param_groups:
        for param in group['params']:
            if param.grad is not None:
                param.grad.data.clamp_(-grad_clip, grad_clip)

def weight_init(module):
    for n, m in module.named_children():
        print('initialize: '+n)
        if isinstance(m, nn.Conv2d):
            nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
            if m.bias is not None:
                nn.init.zeros_(m.bias)
        elif isinstance(m, (nn.BatchNorm2d, nn.InstanceNorm2d)):
            if m.weight is not None:
                nn.init.ones_(m.weight)
            if m.bias is not None:
                nn.init.zeros_(m.bias)
        elif isinstance(m, nn.Linear):
            nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
            if m.bias is not None:
                nn.init.zeros_(m.bias)
        elif isinstance(m, nn.Sequential):
            weight_init(m)
        elif isinstance(m, (nn.ReLU, nn.PReLU)):
            pass
        else:
            m.initialize()


def preprocess(path_src):
    print('process', path_src)
    path_dst = path_src.replace('/SUN-SEG/', '/SUN-SEG-Processed/')
    for folder in os.listdir(path_src+'/Frame'):
        print(folder)
        for name in os.listdir(path_src+'/Frame/'+folder):
            image    = cv2.imread(path_src+'/Frame/'+folder+'/'+name)
            image    = cv2.resize(image, (352,352), interpolation=cv2.INTER_LINEAR)
            mask     = cv2.imread(path_src+'/GT/'+folder+'/'+name.replace('.jpg', '.png'), cv2.IMREAD_GRAYSCALE)
            mask     = cv2.resize(mask, (352, 352), interpolation=cv2.INTER_NEAREST)
            contours = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
            box      = np.zeros_like(mask)
            for contour in contours:
                x,y,w,h = cv2.boundingRect(contour)
                box[y:y+h, x:x+w] = 255

            os.makedirs(path_dst+'/Frame/'+folder, exist_ok=True)
            cv2.imwrite(path_dst+'/Frame/'+folder+'/'+name, image)
            os.makedirs(path_dst+'/GT/'   +folder, exist_ok=True)
            cv2.imwrite(path_dst+'/GT/'   +folder+'/'+name.replace('.jpg', '.png'), mask)
            os.makedirs(path_dst+'/Box/'  +folder, exist_ok=True)
            cv2.imwrite(path_dst+'/Box/'  +folder+'/'+name.replace('.jpg', '.png'), box)

In [None]:

import sys
import logging
import numpy as np
from tqdm import tqdm
from datetime import datetime
sys.dont_write_bytecode = True
sys.path.insert(0, '../')


In [None]:
# cuda out of memory 뜰 때 사용
# cache 삭제

import torch, gc
gc.collect()
torch.cuda.empty_cache()

In [None]:

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

class Train:
    def __init__(self, cfg):
        ## parameter
        self.cfg        = cfg
        self.logger     = SummaryWriter(cfg.log_path)
        logging.basicConfig(level=logging.INFO, filename=cfg.log_path+'/train.log', filemode='a', format='[%(asctime)s | %(message)s]', datefmt='%I:%M:%S')
        ## model
        self.model      = WeakPolyp(cfg).cuda()
        self.model.train()
        ## data
        self.data       = TrainData(cfg)
        self.loader     = DataLoader(dataset=self.data, batch_size=cfg.batch_size, shuffle=True, num_workers=cfg.num_workers)
        ## optimizer
        base, head      = [], []
        for name, param in self.model.named_parameters():
            if 'backbone' in name:
                base.append(param)
            else:
                head.append(param)
        self.optimizer = torch.optim.SGD([{'params':base, 'lr':0.1*cfg.lr}, {'params':head, 'lr':cfg.lr}], momentum=0.9, weight_decay=cfg.weight_decay, nesterov=True)
        self.max_dice   = 0


    def forward(self):
        global_step    = 0
        scaler         = torch.cuda.amp.GradScaler()
        for epoch in range(self.cfg.epoch):
            if epoch in [3, 6, 9, 12]:
                self.optimizer.param_groups[0]['lr'] *= 0.5
            for i, (image, mask) in enumerate(self.loader):
                with torch.cuda.amp.autocast():
                    ## pred 1
                    image, mask    = image.cuda(), mask.cuda()
                    size1          = np.random.choice([256, 288, 320, 352, 384, 416, 448])
                    image1         = F.interpolate(image, size=size1, mode='bilinear')
                    pred1          = self.model(image1)
                    pred1          = F.interpolate(pred1, size=352, mode='bilinear')
                    ## pred 2
                    size2          = np.random.choice([256, 288, 320, 352, 384, 416, 448])
                    image2         = F.interpolate(image, size=size2, mode='bilinear')
                    pred2          = self.model(image2)
                    pred2          = F.interpolate(pred2, size=352, mode='bilinear')
                    ## loss_sc
                    loss_sc        = (torch.sigmoid(pred1)-torch.sigmoid(pred2)).abs()
                    loss_sc        = loss_sc[mask[:,0:1]==1].mean()
                    ## M2B transformation
                    pred           = torch.cat([pred1, pred2], dim=0)
                    mask           = torch.cat([mask, mask], dim=0)
                    predW, predH   = pred.max(dim=2, keepdim=True)[0], pred.max(dim=3, keepdim=True)[0]
                    pred           = torch.minimum(predW, predH)
                    pred, mask     = pred[:,0], mask[:,0]
                    ## loss_ce + loss_dice
                    loss_ce        = F.binary_cross_entropy_with_logits(pred, mask)
                    pred           = torch.sigmoid(pred)
                    inter          = (pred*mask).sum(dim=(1,2))
                    union          = (pred+mask).sum(dim=(1,2))
                    loss_dice      = 1-(2*inter/(union+1)).mean()
                    loss           = loss_ce + loss_dice + loss_sc

                ## backward
                self.optimizer.zero_grad()
                scaler.scale(loss).backward()
                scaler.unscale_(self.optimizer)
                clip_gradient(self.optimizer, self.cfg.clip)
                scaler.step(self.optimizer)
                scaler.update()

                global_step += 1
                self.logger.add_scalar('lr'  , self.optimizer.param_groups[0]['lr'], global_step=global_step)
                self.logger.add_scalars('loss', {'ce':loss_ce.item(), 'dice':loss_dice.item(), 'sc':loss_sc.item()}, global_step=global_step)
                ## print loss
                if global_step % 20 == 0:
                    print('{} epoch={:03d}/{:03d}, step={:04d}/{:04d}, loss_ce={:0.4f}, loss_dice={:0.4f}, loss_sc={:0.4f}'.format(datetime.now(), epoch, self.cfg.epoch, i, len(self.loader), loss_ce.item(), loss_dice.item(), loss_sc.item()))
            self.evaluate(epoch)

    def evaluate(self, epoch):
        self.model.eval()
        with torch.no_grad():
            data = TrainData(self.cfg)  # 데이터 로딩 코드에 대한 임의의 클래스 이름을 사용하였습니다.
            loader = DataLoader(dataset=data, batch_size=32, shuffle=False, num_workers=self.cfg.num_workers)
            dice, iou, cnt = 0, 0, 0
            for image, label in tqdm(loader):
                image, label = image.cuda().float(), label.cuda().long()  # 컬러 이미지와 레이블 데이터 타입을 수정합니다.
                B, C, H, W = image.shape  # 이미지의 채널 수를 C로 수정합니다.

                pred = self.model(image)
                pred = F.interpolate(pred, size=(H, W), mode='bilinear')
                pred = torch.argmax(pred, dim=1)  # 다중 클래스 분류 작업이므로 최댓값 클래스를 선택합니다.
                pred = pred.unsqueeze(1)

                print(image.shape, pred.shape)
                inter = (pred == label).sum().item()  # 정확하게 예측한 픽셀 수를 계산합니다.
                union = (pred + label).clamp(0, 1).sum().item()  # 예측과 실제 레이블 중 어느 하나라도 포함된 픽셀 수를 계산합니다.
                dice += (2 * inter + 1) / (union + 1)
                iou += (inter + 1) / (union - inter + 1)
                cnt += B
            logging.info('epoch=%-8d | dice=%.4f | iou=%.4f | path=%s' % (epoch, dice / cnt, iou / cnt, self.cfg.test_image))

        if dice / cnt > self.max_dice:
            self.max_dice = dice / cnt
            torch.save(self.model.state_dict(), self.cfg.backbone + '/model.pth')
        self.model.train()

class Config:
    def __init__(self, backbone):
        ## set the backbone type
        self.backbone       =  backbone
        ## set the path of training dataset
        self.train_image    = '/content/dataset/train/train'
        self.train_mask     = '/content/dataset/train_gt/train_gt'
        ## set the path of testing dataset
        self.test_image    = '/content/dataset/train/train'
        self.test_image     = '/content/dataset/train_gt/train_gt'

        # ## set the path of logging
        self.log_path       = self.backbone+'/log'
        os.makedirs(self.log_path, exist_ok=True)

        ## keep unchanged
        if self.backbone=='res2net50':
            self.mode           = 'train'
            self.epoch          = 30
            self.batch_size     = 16
            self.lr             = 0.1
            self.num_workers    = 4
            self.weight_decay   = 1e-3
            self.clip           = 0.5
        if self.backbone=='pvt_v2_b2':
            self.mode           = 'train'
            self.epoch          = 30
            self.batch_size     = 16
            self.lr             = 0.1
            self.num_workers    = 4
            self.weight_decay   = 1e-4
            self.clip           = 1000

## training
os.environ ["CUDA_VISIBLE_DEVICES"] = '0'
Train(Config('res2net50')).forward()


initialize: backbone
initialize: fusion
initialize: linear2
initialize: 0
initialize: 1
initialize: linear3
initialize: 0
initialize: 1
initialize: linear4
initialize: 0
initialize: 1
initialize: linear
2023-10-13 08:16:19.797336 epoch=000/030, step=0019/0044, loss_ce=0.0372, loss_dice=1.0000, loss_sc=nan
2023-10-13 08:16:34.154303 epoch=000/030, step=0039/0044, loss_ce=0.0114, loss_dice=1.0000, loss_sc=nan


  5%|▍         | 1/22 [00:05<01:56,  5.54s/it]

torch.Size([32, 3, 352, 352]) torch.Size([32, 1, 352, 352])
torch.Size([32, 3, 352, 352]) torch.Size([32, 1, 352, 352])


  9%|▉         | 2/22 [00:05<00:50,  2.52s/it]

torch.Size([32, 3, 352, 352]) torch.Size([32, 1, 352, 352])


 14%|█▎        | 3/22 [00:06<00:30,  1.59s/it]

torch.Size([32, 3, 352, 352]) torch.Size([32, 1, 352, 352])


 18%|█▊        | 4/22 [00:06<00:20,  1.14s/it]

torch.Size([32, 3, 352, 352]) torch.Size([32, 1, 352, 352])


 23%|██▎       | 5/22 [00:11<00:43,  2.56s/it]

torch.Size([32, 3, 352, 352]) torch.Size([32, 1, 352, 352])


 32%|███▏      | 7/22 [00:12<00:21,  1.42s/it]

torch.Size([32, 3, 352, 352]) torch.Size([32, 1, 352, 352])


 36%|███▋      | 8/22 [00:13<00:16,  1.15s/it]

torch.Size([32, 3, 352, 352]) torch.Size([32, 1, 352, 352])
torch.Size([32, 3, 352, 352]) torch.Size([32, 1, 352, 352])


 41%|████      | 9/22 [00:18<00:28,  2.19s/it]

torch.Size([32, 3, 352, 352]) torch.Size([32, 1, 352, 352])


 45%|████▌     | 10/22 [00:18<00:19,  1.64s/it]

torch.Size([32, 3, 352, 352]) torch.Size([32, 1, 352, 352])


 50%|█████     | 11/22 [00:18<00:14,  1.28s/it]

torch.Size([32, 3, 352, 352]) torch.Size([32, 1, 352, 352])


 55%|█████▍    | 12/22 [00:19<00:10,  1.03s/it]

torch.Size([32, 3, 352, 352]) torch.Size([32, 1, 352, 352])


 59%|█████▉    | 13/22 [00:23<00:17,  1.90s/it]

torch.Size([32, 3, 352, 352]) torch.Size([32, 1, 352, 352])


 64%|██████▎   | 14/22 [00:23<00:11,  1.46s/it]

torch.Size([32, 3, 352, 352]) torch.Size([32, 1, 352, 352])


 68%|██████▊   | 15/22 [00:24<00:08,  1.17s/it]

torch.Size([32, 3, 352, 352]) torch.Size([32, 1, 352, 352])


 73%|███████▎  | 16/22 [00:24<00:05,  1.05it/s]

torch.Size([32, 3, 352, 352]) torch.Size([32, 1, 352, 352])


 77%|███████▋  | 17/22 [00:30<00:11,  2.35s/it]

torch.Size([32, 3, 352, 352]) torch.Size([32, 1, 352, 352])


 82%|████████▏ | 18/22 [00:30<00:07,  1.77s/it]

torch.Size([32, 3, 352, 352]) torch.Size([32, 1, 352, 352])


 86%|████████▋ | 19/22 [00:31<00:04,  1.36s/it]

torch.Size([32, 3, 352, 352]) torch.Size([32, 1, 352, 352])


 91%|█████████ | 20/22 [00:31<00:02,  1.08s/it]

torch.Size([32, 3, 352, 352]) torch.Size([32, 1, 352, 352])


 95%|█████████▌| 21/22 [00:32<00:01,  1.14s/it]

torch.Size([28, 3, 352, 352]) torch.Size([28, 1, 352, 352])


100%|██████████| 22/22 [00:33<00:00,  1.51s/it]


2023-10-13 08:17:25.696105 epoch=001/030, step=0015/0044, loss_ce=0.0087, loss_dice=1.0000, loss_sc=nan


KeyboardInterrupt: ignored

In [None]:
import shutil
shutil.copy('/content/res2net50/model.pth', '/content/drive/MyDrive/Weak_Polyp_model.pth')

In [None]:
os.mkdir('submissions')

In [None]:
model = WeakPolyp(Config('res2net50')).cuda()
model.eval()

In [None]:
TEST_DIR = '/content/dataset/test/test/'

In [None]:
def read_image(image_path):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, (256, 256))
    image = image/255.0
    image = image.astype(np.float32)
    return image

In [None]:
from torchvision import transforms
from PIL import Image



In [None]:
# 모델 불러오기 (예시: WeakPolyp 모델 객체를 생성하고 학습된 가중치를 불러온다고 가정)
model.load_state_dict(torch.load('/content/res2net50/model.pth'))  # 모델 가중치 불러오기
model.eval()
model.cuda()  # 모델을 GPU로 이동

# 클래스에 해당하는 색상 정의
colors = np.array([[0, 0, 0], [0, 255, 0], [255, 0, 0]])

# 테스트 이미지 목록 가져오기
test_images = [os.path.join(TEST_DIR, f) for f in os.listdir(TEST_DIR)]

# 예측 및 저장
for image_path in test_images:
    save_name = os.path.basename(image_path).replace('.jpeg', '.png')

    # 이미지 불러오기 및 전처리
    x = Image.open(image_path)
    preprocess = transforms.Compose([
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
    ])
    x = preprocess(x).unsqueeze(0).cuda()  # 배치 차원 추가 및 GPU로 이동

    # 모델에 입력 데이터 전달하여 예측
    with torch.no_grad():
        output = model(x)

    # 예측 결과를 RGB 형식으로 변환
    pred = torch.argmax(output, dim=1).squeeze().cpu().numpy()
    rgb = np.zeros((*pred.shape, 3), dtype=np.uint8)
    for label, color in enumerate(colors):
        rgb[pred == label] = color

    # 결과 이미지를 파일로 저장
    cv2.imwrite('/content/submissions'+'/'+save_name, rgb)

In [None]:
import numpy as np
import pandas as pd
import cv2
import os

def rle_to_string(runs):
    return ' '.join(str(x) for x in runs)

def rle_encode_one_mask(mask):
    pixels = mask.flatten()
    pixels[pixels > 0] = 255
    use_padding = False
    if pixels[0] or pixels[-1]:
        use_padding = True
        pixel_padded = np.zeros([len(pixels) + 2], dtype=pixels.dtype)
        pixel_padded[1:-1] = pixels
        pixels = pixel_padded

    rle = np.where(pixels[1:] != pixels[:-1])[0] + 2
    if use_padding:
        rle = rle - 1
    rle[1::2] = rle[1::2] - rle[:-1:2]
    return rle_to_string(rle)

def rle2mask(mask_rle, shape=(3,3)):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (width,height) of array to return
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T

def mask2string(dir):
    ## mask --> string
    strings = []
    ids = []
    ws, hs = [[] for i in range(2)]
    for image_id in os.listdir(dir):
        id = image_id.split('.')[0]
        path = os.path.join(dir, image_id)
        print(path)
        img = cv2.imread(path)[:,:,::-1]
        h, w = img.shape[0], img.shape[1]
        for channel in range(2):
            ws.append(w)
            hs.append(h)
            ids.append(f'{id}_{channel}')
            string = rle_encode_one_mask(img[:,:,channel])
            strings.append(string)
    r = {
        'ids': ids,
        'strings': strings,
    }
    return r


MASK_DIR_PATH = '/content/submissions'
dir = MASK_DIR_PATH
res = mask2string(dir)
df = pd.DataFrame(columns=['Id', 'Expected'])
df['Id'] = res['ids']
df['Expected'] = res['strings']

df.to_csv(r'output.csv', index=False)

/content/submissions/6f4d4987ea3b4bae5672a230194c5a08.png
/content/submissions/e9082ea2c193ac8d551c149b60f29653.png
/content/submissions/4e8bfb905b78a91391adc0bb223c4eaf.png
/content/submissions/fcd6da15fc656702fa602bb3c7abacdb.png
/content/submissions/15fc656702fa602bb3c7abacdbd7e6af.png
/content/submissions/3dd311a65d2b46d0a6085835c525af63.png
/content/submissions/6679bff55177a34fc01019eec999fd84.png
/content/submissions/13dd311a65d2b46d0a6085835c525af6.png
/content/submissions/4417fda8019410b1fcf0625f608b4ce9.png
/content/submissions/26679bff55177a34fc01019eec999fd8.png
/content/submissions/60a633a8d5b2b2b55157b7781e2c706c.png
/content/submissions/1c0e9082ea2c193ac8d551c149b60f29.png
/content/submissions/3425b976973f13dd311a65d2b46d0a60.png
/content/submissions/559c7e610b1531871f2fd85a04faeeb2.png
/content/submissions/d5060a633a8d5b2b2b55157b7781e2c7.png
/content/submissions/8eb5a9a8a8d7fcc9df8e5ad89d284483.png
/content/submissions/1ad4f13ccf1f4b331a412fc44655fb51.png
/content/submi