In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
os.chdir('/content/drive/MyDrive/sw_ai')

In [None]:
import os
import cv2
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2

# Pytorch에서 gpu를 사용하는 방법.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Utils

In [None]:
# RLE 디코딩 함수
def rle_decode(mask_rle, shape):
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)


# RLE 인코딩 함수
def rle_encode(mask):
    pixels = mask.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return " ".join(str(x) for x in runs)

# Custom Dataset

In [None]:
class SatelliteDataset(Dataset):
    def __init__(self, csv_file, transform=None, infer=False):
        self.data = pd.read_csv(csv_file)
        self.transform = transform
        self.infer = infer

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = self.data.iloc[idx, 1]
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.infer:
            if self.transform:
                image = self.transform(image=image)['image']
            return image

        mask_rle = self.data.iloc[idx, 2]
        mask = rle_decode(mask_rle, (image.shape[0], image.shape[1]))

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        return image, mask

In [None]:
class SatelliteDatasetForValid(Dataset):
    def __init__(self, dataset, transform=None, infer=False):
        self.data = dataset
        self.transform = transform
        self.infer = infer

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = self.data.iloc[idx, 1]
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.infer:
            if self.transform:
                image = self.transform(image=image)['image']
            return image

        mask_rle = self.data.iloc[idx, 2]
        mask = rle_decode(mask_rle, (image.shape[0], image.shape[1]))

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        return image, mask

# Data Loader

In [None]:
# # Data Loader
transform = A.Compose([A.Resize(224, 224), A.Normalize(), ToTensorV2()])
dataset = SatelliteDataset(csv_file="./train.csv", transform=transform)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True, num_workers=4)

# EDA

## 1) Pixel Count Graph abount each image

In [None]:
import matplotlib.pyplot as plt

In [None]:
img_size = (1024, 1024)
decode_data = [rle_decode(dataset.data["mask_rle"][i], img_size) for i in range(len(dataset.data["mask_rle"]))]

In [None]:
lst_nonzero_count = [np.count_nonzero(decode_data[i]) for i in range(len(dataset.data["mask_rle"]))]
lst_zero_count = [decode_data[0].size - count for count in lst_nonzero_count]

In [None]:
decode_data[0].size

In [None]:
fig, ax = plt.subplots()

sizes, colors = 10, 'red'
x, y = lst_nonzero_count, lst_zero_count

plt.xlabel('nonzero_count')
plt.ylabel('zero_count')

plt.xlim([min(lst_nonzero_count)-10000, max(lst_nonzero_count)+10000])
plt.ylim([min(lst_zero_count)-10000, max(lst_zero_count)+10000])

ax.scatter(x, y, s=sizes, c=colors)

plt.show()

**결과 해석**

개수 뿐만 아니라 위치도 요인으로 작용하겠지만,
위치를 제외한 개수만으로 판단했을 때, 상대적으로 `nonzero_count(건물 pixel 개수)가 크며 zero_count(배경 pixel 개수)가 적은 데이터` 가 부족함을 알 수 있다.

## 2) Pixel Distribution

Training Set의 2가지 Class(0, 1)의 Pixel Distribution을 시각화함.

In [None]:
zero_count = sum(lst_zero_count)
nonzero_count = sum(lst_nonzero_count)

In [None]:
x = [0, 1]
y = [zero_count, nonzero_count]

# plot
fig, ax = plt.subplots()
ax.bar(x, y, width=1, edgecolor="white", linewidth=0.7)

plt.xticks(x)
plt.xlabel('class')
plt.ylabel('count')

plt.show()

**결과 해석**

Class-Imbalance (클래스 불균형)이 굉장히 심한 데이터셋이다.
이 건물이미지 데이터의 경우 배경 pixel 개수가 더 많은 것을 확인할 수 있다.  

# IoU for test dataset

## 1) Model

In [None]:
transform = A.Compose(
    [
        A.Resize(224, 224),
        A.Normalize(),
        ToTensorV2()
    ]
)

dataset = SatelliteDataset(csv_file='./train.csv', transform=transform)
dataloader = DataLoader(dataset, batch_size=4, shuffle=True, num_workers=8)

In [None]:
import torch.nn as nn
import torchvision.models

In [None]:
import torchvision.models as models

def convrelu(in_channels, out_channels, kernel, padding):
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel, padding=padding),
        nn.ReLU(inplace=True),
    )


class ResNetUNet(nn.Module):
    def __init__(self, n_class):
        super().__init__()

        self.base_model = torchvision.models.resnet18(pretrained=True)
        self.base_layers = list(self.base_model.children())

        self.layer0 = nn.Sequential(*self.base_layers[:3]) # size=(N, 64, x.H/2, x.W/2)
        self.layer0_1x1 = convrelu(64, 64, 1, 0)
        self.layer1 = nn.Sequential(*self.base_layers[3:5]) # size=(N, 64, x.H/4, x.W/4)
        self.layer1_1x1 = convrelu(64, 64, 1, 0)
        self.layer2 = self.base_layers[5]  # size=(N, 128, x.H/8, x.W/8)
        self.layer2_1x1 = convrelu(128, 128, 1, 0)
        self.layer3 = self.base_layers[6]  # size=(N, 256, x.H/16, x.W/16)
        self.layer3_1x1 = convrelu(256, 256, 1, 0)
        self.layer4 = self.base_layers[7]  # size=(N, 512, x.H/32, x.W/32)
        self.layer4_1x1 = convrelu(512, 512, 1, 0)

        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)

        self.conv_up3 = convrelu(256 + 512, 512, 3, 1)
        self.conv_up2 = convrelu(128 + 512, 256, 3, 1)
        self.conv_up1 = convrelu(64 + 256, 256, 3, 1)
        self.conv_up0 = convrelu(64 + 256, 128, 3, 1)

        self.conv_original_size0 = convrelu(3, 64, 3, 1)
        self.conv_original_size1 = convrelu(64, 64, 3, 1)
        self.conv_original_size2 = convrelu(64 + 128, 64, 3, 1)

        self.conv_last = nn.Conv2d(64, n_class, 1)

    def forward(self, input):
        x_original = self.conv_original_size0(input)
        x_original = self.conv_original_size1(x_original)

        layer0 = self.layer0(input)
        layer1 = self.layer1(layer0)
        layer2 = self.layer2(layer1)
        layer3 = self.layer3(layer2)
        layer4 = self.layer4(layer3)

        layer4 = self.layer4_1x1(layer4)
        x = self.upsample(layer4)
        layer3 = self.layer3_1x1(layer3)
        x = torch.cat([x, layer3], dim=1)
        x = self.conv_up3(x)

        x = self.upsample(x)
        layer2 = self.layer2_1x1(layer2)
        x = torch.cat([x, layer2], dim=1)
        x = self.conv_up2(x)

        x = self.upsample(x)
        layer1 = self.layer1_1x1(layer1)
        x = torch.cat([x, layer1], dim=1)
        x = self.conv_up1(x)

        x = self.upsample(x)
        layer0 = self.layer0_1x1(layer0)
        x = torch.cat([x, layer0], dim=1)
        x = self.conv_up0(x)

        x = self.upsample(x)
        x = torch.cat([x, x_original], dim=1)
        x = self.conv_original_size2(x)

        out = self.conv_last(x)

        return out

In [None]:
import torch

model = torch.load("./model(32-runet0.0001).pth", map_location=device)

In [None]:
from torchsummary import summary

summary(model, input_size=(3, 224, 224))

## 2) Model Validation set

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
data = pd.read_csv("./train.csv")
train, valid = train_test_split(data, test_size=0.2, random_state=123)

trainset = SatelliteDatasetForValid(dataset = train, transform=transform)
validset = SatelliteDatasetForValid(dataset = valid, transform=transform)

train_dataloader = DataLoader(trainset, batch_size=16, shuffle=True, num_workers=4)
valid_dataloader = DataLoader(validset, batch_size=16, shuffle=False, num_workers=4)

## 3) Model eval

In [None]:
with torch.no_grad():
    model.eval()
    result = []
    for images, mask in tqdm(valid_dataloader):
        images = images.float().to(device)

        outputs = model(images)
        masks = torch.sigmoid(outputs).cpu().numpy()
        masks = np.squeeze(masks, axis=1)
        masks = (masks > 0.35).astype(np.uint8) # Threshold = 0.35

        for i in range(len(images)):
            mask_rle = rle_encode(masks[i])
            if mask_rle == '': # 예측된 건물 픽셀이 아예 없는 경우 -1
                result.append(-1)
            else:
                result.append(mask_rle)

In [None]:
# import time

# empty_list = []
# for word in tqdm(['apple', 'ball', 'cat', 'doll', 'egg']):
#     time.sleep(0.1)
#     empty_list.append(word)

In [None]:
# empty_list

## 4) 성능 평가

### true_mask vs pred_mask 이미지 비교

In [None]:
import numpy as np
import pandas as pd
from typing import List, Union
from joblib import Parallel, delayed


def rle_decode(mask_rle: Union[str, int], shape=(224, 224)) -> np.array:
    '''
    mask_rle: run-length as string formatted (start length)
    shape: (height,width) of array to return
    Returns numpy array, 1 - mask, 0 - background
    '''
    if mask_rle == -1:
        return np.zeros(shape)

    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)

In [None]:
lst_input_img_path = list(valid['img_path'])

In [None]:
def display(display_list):
    plt.figure(figsize=(15, 15))

    title = ['Input Image', 'True Mask', 'Predicted Mask']

    for i in range(len(display_list)):
        plt.subplot(1, len(display_list), i+1)
        plt.title(title[i])
        if i == 0:
            img = cv2.imread(display_list[i])
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = img.astype(np.uint8).copy()
        if i == 1:
            img = display_list[i]
        if i == 2:
            img = rle_decode(display_list[i])
        plt.imshow(img)
        plt.axis('off')
    plt.show()

display_list = [lst_input_img_path[0], (validset.__getitem__(0))[1], result[0]]
display(display_list)

---

### Dice Score

In [None]:
def dice_score(prediction: np.array, ground_truth: np.array, smooth=1e-7) -> float:
    '''
    Calculate Dice Score between two binary masks.
    '''
    intersection = np.sum(prediction * ground_truth)
    return (2.0 * intersection + smooth) / (np.sum(prediction) + np.sum(ground_truth) + smooth)


def calculate_dice_scores(ground_truth_df, prediction_df, img_shape=(224, 224)) -> List[float]:
    '''
    Calculate Dice scores for a dataset.
    '''


    # Keep only the rows in the prediction dataframe that have matching img_ids in the ground truth dataframe
    prediction_df = prediction_df[prediction_df.iloc[:, 0].isin(ground_truth_df.iloc[:, 0])]
    prediction_df.index = range(prediction_df.shape[0])


    # Extract the mask_rle columns
    pred_mask_rle = prediction_df.iloc[:, 1]
    gt_mask_rle = ground_truth_df.iloc[:, 1]


    def calculate_dice(pred_rle, gt_rle):
        pred_mask = rle_decode(pred_rle, img_shape)
        gt_mask = rle_decode(gt_rle, img_shape)


        if np.sum(gt_mask) > 0 or np.sum(pred_mask) > 0:
            return dice_score(pred_mask, gt_mask)
        else:
            return None  # No valid masks found, return None


    dice_scores = Parallel(n_jobs=-1)(
        delayed(calculate_dice)(pred_rle, gt_rle) for pred_rle, gt_rle in zip(pred_mask_rle, gt_mask_rle)
    )


    dice_scores = [score for score in dice_scores if score is not None]  # Exclude None values


    return np.mean(dice_scores)

In [None]:
# ground_truth_df = valid.drop('img_path', axis=1)
df = valid.drop('img_path', axis=1)

In [None]:
# valid_pred = {'img_id': ground_truth_df['img_id'], 'mask_rle': result}
valid_pred = {'img_id': df['img_id'], 'mask_rle': result}
prediction_df = pd.DataFrame(data = valid_pred)

In [None]:
lst_ground_truth_rle = [rle_encode((validset.__getitem__(i))[1]) for i in range(len(valid))]
valid_pred = {'img_id': df['img_id'], 'mask_rle': lst_ground_truth_rle}
ground_truth_df = pd.DataFrame(data = valid_pred)

In [None]:
rle_decode(ground_truth_df['mask_rle'][4458]).shape

In [None]:
calculate_dice_scores(ground_truth_df, prediction_df, img_shape=(224, 224))

### Class 별 IOU

In [None]:
predNoBuildingIdx = list(filter(lambda x: result[x] == -1, range(len(result))))

In [None]:
from sklearn.metrics import confusion_matrix

def generateConfusionMatrix(ground_truth_mask, pred_mask):
    y_true = sum(rle_decode(ground_truth_mask).tolist(), [])
    y_pred = sum(rle_decode(pred_mask).tolist(), [])
    cMatrix = confusion_matrix(y_true, y_pred)
    return cMatrix

def generateConfusionMatrixLst(lst_ground_truth_rle, lst_pred_rle):
    lst_cMatrix = Parallel(n_jobs=1)(delayed(generateConfusionMatrix)(lst_ground_truth_rle[i], result[i]) for i in range(len(lst_ground_truth_rle)))
    return lst_cMatrix

In [None]:
Lst_cMatrix = generateConfusionMatrixLst(lst_ground_truth_rle, result)

In [None]:
Lst_cMatrix[0]

In [None]:
def IoU(cMatrix):
    Intersection = cMatrix.diagonal()
    Union11 = cMatrix.sum(axis = 0)[0] + cMatrix[0][1]
    Union22 = cMatrix.sum(axis = 0)[1] + cMatrix[1][0]
    Union = np.array([Union11, Union22])
    return Intersection / Union

# 전체 이미지 IoU 수치에 대하여 평균냄.
def totalIoU(lst_cMatrix):
    totalIoU = np.array([0, 0], dtype = 'float64')
    for cMat in lst_cMatrix:
        totalIoU += IoU(cMat)
    return totalIoU / len(lst_cMatrix)

def eachIoU(lst_cMatrix):
    eachIoU = []
    for cMat in lst_cMatrix:
        eachIoU.append(IoU(cMat))
    return eachIoU

In [None]:
IoU(Lst_cMatrix[0])

In [None]:
totalIoU(Lst_cMatrix)

In [None]:
totaliou = totalIoU(Lst_cMatrix)

In [None]:
def printClassScores(totaliou):
    label = ['background', 'building']
    print('classes          IoU      nIoU')
    print('--------------------------------')
    for i, iou in enumerate(totaliou):
        labelName = label[i]
        iouStr = f'{iou:>5.3f}'
        niouStr = 'empty'
        print('{:<14}: '.format(labelName) + iouStr + '    ' + niouStr)
    print('--------------------------------')
    print(f'Score Average : {(np.sum(totaliou) / 2):>5.3f}' + '    ' + niouStr)

In [None]:
printClassScores(totaliou)