# 전체 실행

## Import

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
path = "/content/drive/MyDrive/졸업과제/severstal-steel-defect-detection/"

In [None]:
# 칼럼명: ImageId,	ClassId,	EncodedPixels

In [None]:
import os
import cv2
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms

from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Utils

In [None]:
# RLE 디코딩 함수
def rle_decode(mask_rle, shape):
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)

# RLE 인코딩 함수
def rle_encode(mask):
    pixels = mask.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

In [None]:
dff = pd.read_csv(path+"train.csv")

In [None]:
path+"train_images/"+dff.iloc[2, 0]

'/content/drive/MyDrive/졸업과제/severstal-steel-defect-detection/train_images/000a4bcdd.jpg'

## Custom Dataset

In [None]:
class SteelDataset(Dataset):
    def __init__(self, csv_file, transform=None, infer=False):
        self.data = pd.read_csv(csv_file)
        self.transform = transform
        self.infer = infer

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = self.data.iloc[idx, 0] # 수정
        image = cv2.imread(path+"train_images/"+img_path) # 수정
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.infer:
            if self.transform:
                image = self.transform(image=image)['image']
            return image

        mask_rle = self.data.iloc[idx, 2]
        mask = rle_decode(mask_rle, (image.shape[0], image.shape[1]))

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        return image, mask

## Data Loader

In [None]:
transform = A.Compose(
    [
        A.Resize(224, 224),
        A.Normalize(),
        ToTensorV2()
    ]
)

dataset = SteelDataset(csv_file=path+"train.csv", transform=transform)

In [None]:
len(dataset)

7095

In [None]:
train_dataset, val_dataset = random_split(dataset, [0.8,0.2])

In [None]:
len(train_dataset), len(val_dataset)

(5676, 1419)

In [None]:
train_loader = DataLoader(dataset=train_dataset, batch_size=16)
val_loader   = DataLoader(dataset=val_dataset, batch_size=20)
# dataloader = DataLoader(dataset, batch_size=16, shuffle=True, num_workers=4)

## Define Model

In [None]:
# U-Net의 기본 구성 요소인 Double Convolution Block을 정의합니다.
def double_conv(in_channels, out_channels):
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, 3, padding=1),
        nn.ReLU(inplace=True),
        nn.Conv2d(out_channels, out_channels, 3, padding=1),
        nn.ReLU(inplace=True)
    )

# 간단한 U-Net 모델 정의
class UNet(nn.Module):
    def __init__(self):
        super(UNet, self).__init__()
        self.dconv_down1 = double_conv(3, 64)
        self.dconv_down2 = double_conv(64, 128)
        self.dconv_down3 = double_conv(128, 256)
        self.dconv_down4 = double_conv(256, 512)

        self.maxpool = nn.MaxPool2d(2)
        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)

        self.dconv_up3 = double_conv(256 + 512, 256)
        self.dconv_up2 = double_conv(128 + 256, 128)
        self.dconv_up1 = double_conv(128 + 64, 64)

        self.conv_last = nn.Conv2d(64, 1, 1)

    def forward(self, x):
        conv1 = self.dconv_down1(x)
        x = self.maxpool(conv1)

        conv2 = self.dconv_down2(x)
        x = self.maxpool(conv2)

        conv3 = self.dconv_down3(x)
        x = self.maxpool(conv3)

        x = self.dconv_down4(x)

        x = self.upsample(x)
        x = torch.cat([x, conv3], dim=1)

        x = self.dconv_up3(x)
        x = self.upsample(x)
        x = torch.cat([x, conv2], dim=1)

        x = self.dconv_up2(x)
        x = self.upsample(x)
        x = torch.cat([x, conv1], dim=1)

        x = self.dconv_up1(x)

        out = self.conv_last(x)

        return out

In [None]:
model = UNet().to(device)

In [None]:
!pip install torchviz

Collecting torchviz
  Downloading torchviz-0.0.2.tar.gz (4.9 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: torchviz
  Building wheel for torchviz (setup.py) ... [?25l[?25hdone
  Created wheel for torchviz: filename=torchviz-0.0.2-py3-none-any.whl size=4130 sha256=2dba12f13b71cccc1e628e58558879e8d8872f5078e43b99e5b050da2ac8ff19
  Stored in directory: /root/.cache/pip/wheels/4c/97/88/a02973217949e0db0c9f4346d154085f4725f99c4f15a87094
Successfully built torchviz
Installing collected packages: torchviz
Successfully installed torchviz-0.0.2


## Model Train

In [None]:
# model 초기화
model = UNet().to(device)

# loss function과 optimizer 정의
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# training loop
for epoch in range(10):  # 10 에폭 동안 학습합니다.
    model.train()
    epoch_loss = 0
    for images, masks in tqdm(train_loader):
        images = images.float().to(device)
        masks = masks.float().to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, masks.unsqueeze(1))
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f'Epoch {epoch+1}, Loss: {epoch_loss/len(train_loader)}')

100%|██████████| 355/355 [28:13<00:00,  4.77s/it]


Epoch 1, Loss: 0.27910277285206486


100%|██████████| 355/355 [03:47<00:00,  1.56it/s]


Epoch 2, Loss: 0.21343074151327912


100%|██████████| 355/355 [03:52<00:00,  1.53it/s]


Epoch 3, Loss: 0.20906619444279603


100%|██████████| 355/355 [03:53<00:00,  1.52it/s]


Epoch 4, Loss: 0.2066254127823131


100%|██████████| 355/355 [03:53<00:00,  1.52it/s]


Epoch 5, Loss: 0.2049610885096268


100%|██████████| 355/355 [03:51<00:00,  1.53it/s]


Epoch 6, Loss: 0.20386443354294334


100%|██████████| 355/355 [03:51<00:00,  1.54it/s]


Epoch 7, Loss: 0.20291363397534465


100%|██████████| 355/355 [03:51<00:00,  1.53it/s]


Epoch 8, Loss: 0.2023258961632218


100%|██████████| 355/355 [03:51<00:00,  1.54it/s]


Epoch 9, Loss: 0.20183725938410826


100%|██████████| 355/355 [03:51<00:00,  1.54it/s]

Epoch 10, Loss: 0.20123758620359528





## Validation

In [None]:
with torch.no_grad():
    model.eval()
    val_result = []
    for images, masks in tqdm(val_loader):
        images = images.float().to(device)

        outputs = model(images)
        masks = torch.sigmoid(outputs).cpu().numpy()
        masks = np.squeeze(masks, axis=1)
        masks = (masks > 0.35).astype(np.uint8) # Threshold = 0.35

        for i in range(len(images)):
            mask_rle = rle_encode(masks[i])
            if mask_rle == '': # 예측된 불량 픽셀이 아예 없는 경우 -1
                val_result.append(-1)
            else:
                val_result.append(mask_rle)

100%|██████████| 71/71 [05:50<00:00,  4.94s/it]


In [None]:
# 결과값 데이터프레임 형태로 바꾸기
len(val_result)

1419

In [None]:
type(val_result)

list

In [None]:
pred_val = pd.DataFrame(val_result, columns=['EncodedPixels']).reset_index(drop=True)

In [None]:
pred_val.to_csv(path+"predict_u_net.csv")

In [None]:
real_val = dataset.data.loc[list(val_dataset.indices)].reset_index(drop=True)[['ImageId','EncodedPixels']]

In [None]:
real_val

Unnamed: 0,ImageId,EncodedPixels
0,ce22cd852.jpg,40462 1 40717 3 40971 5 41226 7 41481 8 41736 ...
1,4efe827ed.jpg,28507 15 28748 30 29004 30 29260 30 29516 30 2...
2,67a174dc4.jpg,109057 43 109313 128 109569 213 109825 2304 11...
3,e07f3eb4b.jpg,300980 3 301209 15 301236 7 301447 2 301464 20...
4,97bbf0668.jpg,108804 1 109059 4 109315 6 109570 9 109825 11 ...
...,...,...
1414,d160b5de3.jpg,115457 13 115713 39 115969 64 116225 90 116481...
1415,61f22bd01.jpg,178755 6 178791 18 179008 11 179037 58 179124 ...
1416,d7b0f2130.jpg,260033 64 260161 4288 311041 3840 314882 255 3...
1417,398a5b6a9.jpg,218513 12 218768 37 219024 49 219279 50 219534...


In [None]:
pred_val = pd.DataFrame(val_result, columns=['EncodedPixels']).reset_index(drop=True)

In [None]:
pred_val = pd.concat([pred_val,real_val[['ImageId']]],axis=1)[['ImageId','EncodedPixels']]

In [None]:
pred_val

Unnamed: 0,ImageId,EncodedPixels
0,ce22cd852.jpg,-1
1,4efe827ed.jpg,-1
2,67a174dc4.jpg,-1
3,e07f3eb4b.jpg,-1
4,97bbf0668.jpg,-1
...,...,...
1414,d160b5de3.jpg,-1
1415,61f22bd01.jpg,-1
1416,d7b0f2130.jpg,-1
1417,398a5b6a9.jpg,-1


## validation score

In [None]:
import numpy as np
import pandas as pd
from typing import List, Union
from joblib import Parallel, delayed


def rle_decode(mask_rle: Union[str, int], shape=(224, 224)) -> np.array:
    '''
    mask_rle: run-length as string formatted (start length)
    shape: (height,width) of array to return
    Returns numpy array, 1 - mask, 0 - background
    '''
    if mask_rle == -1:
        return np.zeros(shape)

    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)


def dice_score(prediction: np.array, ground_truth: np.array, smooth=1e-7) -> float:
    '''
    Calculate Dice Score between two binary masks.
    '''
    intersection = np.sum(prediction * ground_truth)
    return (2.0 * intersection + smooth) / (np.sum(prediction) + np.sum(ground_truth) + smooth)


def calculate_dice_scores(ground_truth_df, prediction_df, img_shape=(224, 224)) -> List[float]:
    '''
    Calculate Dice scores for a dataset.
    '''


    # Keep only the rows in the prediction dataframe that have matching img_ids in the ground truth dataframe
    prediction_df = prediction_df[prediction_df.iloc[:, 0].isin(ground_truth_df.iloc[:, 0])]
    prediction_df.index = range(prediction_df.shape[0])


    # Extract the mask_rle columns
    pred_mask_rle = prediction_df.iloc[:, 1]
    gt_mask_rle = ground_truth_df.iloc[:, 1]


    def calculate_dice(pred_rle, gt_rle):
        pred_mask = rle_decode(pred_rle, img_shape)
        gt_mask = rle_decode(gt_rle, img_shape)


        if np.sum(gt_mask) > 0 or np.sum(pred_mask) > 0:
            return dice_score(pred_mask, gt_mask)
        else:
            return None  # No valid masks found, return None


    dice_scores = Parallel(n_jobs=-1)(
        delayed(calculate_dice)(pred_rle, gt_rle) for pred_rle, gt_rle in zip(pred_mask_rle, gt_mask_rle)
    )


    dice_scores = [score for score in dice_scores if score is not None]  # Exclude None values


    return np.mean(dice_scores)

In [None]:
dice_sc = calculate_dice_scores(real_val,pred_val)

In [None]:
dice_sc

9.022641977511913e-11