## Import

In [None]:
import os
import cv2
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

from tqdm import tqdm
import albumentations as A
from albumentations.pytorch import ToTensorV2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
import random

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
seed_everything(37) # Seed 고정

In [None]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

os.chdir('/content/gdrive/MyDrive/sw_ai')

Mounted at /content/gdrive


## Utils

In [None]:
# RLE 디코딩 함수
def rle_decode(mask_rle, shape):
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape)

# RLE 인코딩 함수
def rle_encode(mask):
    pixels = mask.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

## Custom Dataset

In [None]:
class SatelliteDataset(Dataset):
    def __init__(self, csv_file, transform=None, infer=False):
        self.data = pd.read_csv(csv_file)
        #for i in range(len(self.data.iloc[:,1] )):
          #self.data.iloc[:,1][i]=self.data.iloc[:,1][i].replace("./", "/")
        #self.data.iloc[:,1] = "./drive/MyDrive/sw_ai" + self.data.iloc[:,1]
        self.transform = transform
        self.infer = infer

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = self.data.iloc[idx, 1]
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        if self.infer:
            if self.transform:
                image = self.transform(image=image)['image']
            return image

        mask_rle = self.data.iloc[idx, 2]
        mask = rle_decode(mask_rle, (image.shape[0], image.shape[1]))

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented['image']
            mask = augmented['mask']

        return image, mask

## Data Loader

In [None]:
transform = A.Compose(
    [
        A.Resize(224, 224),
        A.Normalize(),
        ToTensorV2()
    ]
)

dataset = SatelliteDataset(csv_file='./train.csv', transform=transform)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True, num_workers=8)

In [None]:
import torch.nn as nn
import torchvision.models

## Define Model

In [None]:
import torchvision.models as models

def convrelu(in_channels, out_channels, kernel, padding):
  return nn.Sequential(
    nn.Conv2d(in_channels, out_channels, kernel, padding=padding),
    nn.ReLU(inplace=True),
  )


class ResNetUNet(nn.Module):
  def __init__(self, n_class):
    super().__init__()

    self.base_model = torchvision.models.resnet18(pretrained=True)
    self.base_layers = list(self.base_model.children())

    self.layer0 = nn.Sequential(*self.base_layers[:3]) # size=(N, 64, x.H/2, x.W/2)
    self.layer0_1x1 = convrelu(64, 64, 1, 0)
    self.layer1 = nn.Sequential(*self.base_layers[3:5]) # size=(N, 64, x.H/4, x.W/4)
    self.layer1_1x1 = convrelu(64, 64, 1, 0)
    self.layer2 = self.base_layers[5]  # size=(N, 128, x.H/8, x.W/8)
    self.layer2_1x1 = convrelu(128, 128, 1, 0)
    self.layer3 = self.base_layers[6]  # size=(N, 256, x.H/16, x.W/16)
    self.layer3_1x1 = convrelu(256, 256, 1, 0)
    self.layer4 = self.base_layers[7]  # size=(N, 512, x.H/32, x.W/32)
    self.layer4_1x1 = convrelu(512, 512, 1, 0)

    self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)

    self.conv_up3 = convrelu(256 + 512, 512, 3, 1)
    self.conv_up2 = convrelu(128 + 512, 256, 3, 1)
    self.conv_up1 = convrelu(64 + 256, 256, 3, 1)
    self.conv_up0 = convrelu(64 + 256, 128, 3, 1)

    self.conv_original_size0 = convrelu(3, 64, 3, 1)
    self.conv_original_size1 = convrelu(64, 64, 3, 1)
    self.conv_original_size2 = convrelu(64 + 128, 64, 3, 1)

    self.conv_last = nn.Conv2d(64, n_class, 1)

  def forward(self, input):
    x_original = self.conv_original_size0(input)
    x_original = self.conv_original_size1(x_original)

    layer0 = self.layer0(input)
    layer1 = self.layer1(layer0)
    layer2 = self.layer2(layer1)
    layer3 = self.layer3(layer2)
    layer4 = self.layer4(layer3)

    layer4 = self.layer4_1x1(layer4)
    x = self.upsample(layer4)
    layer3 = self.layer3_1x1(layer3)
    x = torch.cat([x, layer3], dim=1)
    x = self.conv_up3(x)

    x = self.upsample(x)
    layer2 = self.layer2_1x1(layer2)
    x = torch.cat([x, layer2], dim=1)
    x = self.conv_up2(x)

    x = self.upsample(x)
    layer1 = self.layer1_1x1(layer1)
    x = torch.cat([x, layer1], dim=1)
    x = self.conv_up1(x)

    x = self.upsample(x)
    layer0 = self.layer0_1x1(layer0)
    x = torch.cat([x, layer0], dim=1)
    x = self.conv_up0(x)

    x = self.upsample(x)
    x = torch.cat([x, x_original], dim=1)
    x = self.conv_original_size2(x)

    out = self.conv_last(x)

    return out

## Model Train

In [None]:
model

In [None]:
from torchsummary import summary
summary(model, input_size=(3, 512, 512))

In [None]:
# model 초기화
model = ResNetUNet(1).to(device)

# loss function과 optimizer 정의
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

# training loop
for epoch in range(17):  # 10 에폭 동안 학습합니다.
    model.train()
    epoch_loss = 0
    for images, masks in tqdm(dataloader):
        images = images.float().to(device)
        masks = masks.float().to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, masks.unsqueeze(1))
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f'Epoch {epoch+1}, Loss: {epoch_loss/len(dataloader)}')

100%|██████████| 447/447 [04:37<00:00,  1.61it/s]


Epoch 1, Loss: 0.1573596568882332


100%|██████████| 447/447 [00:47<00:00,  9.47it/s]


Epoch 2, Loss: 0.1136941791780843


100%|██████████| 447/447 [00:46<00:00,  9.52it/s]


Epoch 3, Loss: 0.10003746903482699


100%|██████████| 447/447 [00:47<00:00,  9.50it/s]


Epoch 4, Loss: 0.0911728963839261


100%|██████████| 447/447 [00:47<00:00,  9.47it/s]


Epoch 5, Loss: 0.08457090042248135


100%|██████████| 447/447 [00:47<00:00,  9.39it/s]


Epoch 6, Loss: 0.08048426194879033


100%|██████████| 447/447 [00:47<00:00,  9.51it/s]


Epoch 7, Loss: 0.07483216403448074


100%|██████████| 447/447 [00:47<00:00,  9.44it/s]


Epoch 8, Loss: 0.07052853535152388


100%|██████████| 447/447 [00:47<00:00,  9.42it/s]


Epoch 9, Loss: 0.06807548441076172


100%|██████████| 447/447 [00:47<00:00,  9.47it/s]


Epoch 10, Loss: 0.0649616323261453


100%|██████████| 447/447 [00:47<00:00,  9.39it/s]


Epoch 11, Loss: 0.06263892225904485


100%|██████████| 447/447 [00:47<00:00,  9.47it/s]


Epoch 12, Loss: 0.060212456448299506


100%|██████████| 447/447 [00:47<00:00,  9.45it/s]


Epoch 13, Loss: 0.05847673666073839


100%|██████████| 447/447 [00:47<00:00,  9.44it/s]


Epoch 14, Loss: 0.05679118023222725


100%|██████████| 447/447 [00:47<00:00,  9.49it/s]


Epoch 15, Loss: 0.05633058049768143


100%|██████████| 447/447 [00:47<00:00,  9.37it/s]


Epoch 16, Loss: 0.05372762696304977


100%|██████████| 447/447 [00:47<00:00,  9.41it/s]

Epoch 17, Loss: 0.05256773393092806





In [None]:
import torch

# 학습된 모델을 model 변수에 할당한 후 저장
torch.save(model, './model(31).pth')


In [None]:
import torch
model = torch.load("./model(31).pth", map_location=device)

## Inference

In [None]:
test_dataset = SatelliteDataset(csv_file='./test.csv', transform=transform, infer=True)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=8)

In [None]:
with torch.no_grad():
    model.eval()
    result = []
    for images in tqdm(test_dataloader):
        images = images.float().to(device)

        outputs = model(images)
        masks = torch.sigmoid(outputs).cpu().numpy()
        masks = np.squeeze(masks, axis=1)
        masks = (masks > 0.35).astype(np.uint8) # Threshold = 0.35

        for i in range(len(images)):
            mask_rle = rle_encode(masks[i])
            if mask_rle == '': # 예측된 건물 픽셀이 아예 없는 경우 -1
                result.append(-1)
            else:
                result.append(mask_rle)

100%|██████████| 3790/3790 [24:14<00:00,  2.61it/s]


## Submission

In [None]:
submit = pd.read_csv('./sample_submission.csv')
submit['mask_rle'] = result

In [None]:
submit.to_csv('./submit(31).csv', index=False)