## 필요 모듈 설치

In [None]:
!pip install pretrainedmodels==0.7.4
!pip install efficientnet_pytorch==0.6.3
!pip install -U git+https://github.com/albu/albumentations --no-cache-dir
!pip install -U git+https://github.com/qubvel/segmentation_models.pytorch

In [None]:
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt
import random
import sys

import albumentations as albu

from torch.utils.data import DataLoader
from torch.utils.data import Dataset as BaseDataset

import torch
import numpy as np
import segmentation_models_pytorch as smp

## 결과 재현을 위한 설정

In [None]:
seed = 50
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = True

## 경로 설정

In [None]:
# segmentation 소스코드 경로 설정
sys.path.append('../input/lv2-dataset/SIA_pytorch/segmentation_models')  

In [None]:
x_train_dir = '../input/lv2-dataset/LV2_training_set/images_4'
y_train_dir = '../input/lv2-dataset/LV2_training_set/labels_4'

x_valid_dir = '../input/lv2-dataset/LV2_validation_set/images'
y_valid_dir = '../input/lv2-dataset/LV2_validation_set/labels'

## 데이터 시각화 

In [None]:
# helper function for data visualization
def visualize(**images):
    """PLot images in one row."""
    n = len(images)
    plt.figure(figsize=(16, 5))
    for i, (name, image) in enumerate(images.items()):
        plt.subplot(1, n, i + 1)
        plt.xticks([])
        plt.yticks([])
        plt.title(' '.join(name.split('_')).title())
        plt.imshow(image)
    plt.show()

## 데이터로더 정의

In [None]:
class Dataset(BaseDataset):
    """CamVid Dataset. Read images, apply augmentation and preprocessing transformations.
    
    Args:
        images_dir (str): path to images folder
        masks_dir (str): path to segmentation masks folder
        class_values (list): values of classes to extract from segmentation mask
        augmentation (albumentations.Compose): data transfromation pipeline 
            (e.g. flip, scale, etc.)
        preprocessing (albumentations.Compose): data preprocessing 
            (e.g. noralization, shape manipulation, etc.)
    
    """
    
    CLASSES = ['building', 'road']
    
    def __init__(
            self, 
            images_dir, 
            masks_dir, 
            classes=None, 
            augmentation=None, 
            preprocessing=None,
    ):
        self.ids = os.listdir(images_dir)
        self.images_fps = [os.path.join(images_dir, image_id) for image_id in self.ids]
        self.masks_fps = [os.path.join(masks_dir, image_id) for image_id in self.ids]

        # convert str names to class values on masks
        self.class_values = [200, 255]
        
        for i in range(len(self.masks_fps)):
            self.mask_ids = np.unique(cv2.imread(self.masks_fps[i], 0))[1:]
            if len(self.mask_ids) == len(self.class_values):
                break
        
        self.augmentation = augmentation
        self.preprocessing = preprocessing
    
    def __getitem__(self, i):
        # read data
        image = cv2.imread(self.images_fps[i])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        mask = cv2.imread(self.masks_fps[i], 0)
        
        # extract certain classes from mask (e.g. cars)
        masks = [(mask == v) for v in self.class_values]
        mask = np.stack(masks, axis=-1).astype('float')

        # apply augmentations
        if self.augmentation:
            sample = self.augmentation(image=image, mask=mask)
            image, mask = sample['image'], sample['mask']
        
        # apply preprocessing
        if self.preprocessing:
            sample = self.preprocessing(image=image, mask=mask)
            image, mask = sample['image'], sample['mask']  

        return image, mask
        
    def __len__(self):
        return len(self.ids)

In [None]:
# Lets look at data we have

dataset_building = Dataset(x_train_dir, y_train_dir, classes=['building','road'])

for i in range(5):
    image, mask = dataset_building[i] # get some sample
    visualize(
        image=image,
        building_mask=mask[:,:,0].squeeze(),
        road_mask=mask[:,:,1].squeeze(),
    )

## 데이터 증대

In [None]:
def get_training_augmentation():
    train_transform = [
        albu.RandomCrop(height=320, width=320, always_apply=True),
        albu.PadIfNeeded(min_height=320, min_width=320, always_apply=True, border_mode=0),

        albu.OneOf(
            [
                albu.HorizontalFlip(p=1),
                albu.RandomRotate90(p=1),
                albu.VerticalFlip(p=1),
                albu.Transpose(p=1),
                albu.ShiftScaleRotate(scale_limit=0, rotate_limit=45,
                                      shift_limit=0,p=0.5, border_mode=0),
            ],
            p=0.9,
        ),
        
        albu.RandomBrightnessContrast(p=0.8),
        albu.RandomGamma(p=0.8),
        albu.FancyPCA(p=0.2)
    ]
    return albu.Compose(train_transform)


def get_validation_augmentation():
    """Add paddings to make image shape divisible by 32"""
    test_transform = [
        albu.PadIfNeeded(576, 512)
    ]
    return albu.Compose(test_transform)


def to_tensor(x, **kwargs):
    return x.transpose(2, 0, 1).astype('float32')


def get_preprocessing(preprocessing_fn):
    """Construct preprocessing transform
    
    Args:
        preprocessing_fn (callbale): data normalization function 
            (can be specific for each pretrained neural network)
    Return:
        transform: albumentations.Compose
    
    """
    
    _transform = [
        albu.Lambda(image=preprocessing_fn),
        albu.Lambda(image=to_tensor, mask=to_tensor),
    ]
    return albu.Compose(_transform)

## 모델 생성

In [None]:
ENCODER = 'efficientnet-b7'
ENCODER_WEIGHTS = 'imagenet'
CLASSES = ['building','road']
ACTIVATION = 'sigmoid' # could be None for logits or 'softmax2d' for multiclass segmentation
DEVICE = 'cuda'

# create segmentation model with pretrained encoder
model = smp.UnetPlusPlus(
    encoder_name=ENCODER, 
    encoder_weights=ENCODER_WEIGHTS, 
    classes=len(CLASSES), 
    activation=ACTIVATION,
)

preprocessing_fn = smp.encoders.get_preprocessing_fn(ENCODER, ENCODER_WEIGHTS)

In [None]:
train_dataset = Dataset(
    x_train_dir, 
    y_train_dir, 
    augmentation=get_training_augmentation(), 
    preprocessing=get_preprocessing(preprocessing_fn),
    classes=CLASSES,
)

valid_dataset = Dataset(
    x_valid_dir, 
    y_valid_dir, 
    augmentation=get_validation_augmentation(), 
    preprocessing=get_preprocessing(preprocessing_fn),
    classes=CLASSES,
)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=2)
valid_loader = DataLoader(valid_dataset, batch_size=1, shuffle=False, num_workers=1)

In [None]:
loss = smp.utils.losses.DiceLoss()
metrics = [
    smp.utils.metrics.IoU(threshold=0.5),
]

optimizer = torch.optim.Adam([ 
    dict(params=model.parameters(), lr=0.0001),
])

In [None]:
train_epoch = smp.utils.train.TrainEpoch(
    model, 
    loss=loss, 
    metrics=metrics, 
    optimizer=optimizer,
    device=DEVICE,
    verbose=True,
)

valid_epoch = smp.utils.train.ValidEpoch(
    model, 
    loss=loss, 
    metrics=metrics, 
    device=DEVICE,
    verbose=True,
)

## 모델 훈련

In [None]:
max_score = 0

save_dir = os.path.join('/kaggle/working', 'ckpt')
os.makedirs(save_dir, exist_ok=True)

for i in range(0, 100):
    start = torch.cuda.Event(enable_timing=True) 
    end = torch.cuda.Event(enable_timing=True)
    
    print('\nEpoch: {}'.format(i))
    train_logs = train_epoch.run(train_loader)
    
    start.record()
    valid_logs = valid_epoch.run(valid_loader)
    end.record()
    torch.cuda.synchronize()
    
    if max_score < valid_logs['iou_score']:
        max_score = valid_logs['iou_score']
        torch.save(model, os.path.join(save_dir, 'best_model.pth'))
        print('Model saved!')
        
    if i == 50:
        optimizer.param_groups[0]['lr'] = 1e-5
        print('Decrease decoder learning rate to 1e-5!')   

    print(f'FPS : {(start.elapsed_time(end)/len(valid_loader))/1000}')

## 이미지 시각화

In [None]:
best_model = torch.load(os.path.join(save_dir, 'best_model.pth'))

In [None]:
valid_dataset_vis = Dataset(
    x_valid_dir, y_valid_dir, 
    classes=['building','road'],
)

In [None]:
def combine_masks(masks):
  # masks should be size (channels, w, h)
  output_mask = np.zeros(masks[0].shape, dtype=np.uint8)

  for i, mask in enumerate(masks):
    output_mask[mask==1] = i + 1

  return output_mask

In [None]:
for i in range(10):
    image_vis = valid_dataset_vis[i][0].astype('uint8')
    image, gt_mask = valid_dataset[i]
    
    gt_mask = gt_mask.squeeze()
    
    x_tensor = torch.from_numpy(image).to(DEVICE).unsqueeze(0)
    pr_mask = best_model.predict(x_tensor)
    pr_mask = (pr_mask.squeeze().cpu().numpy().round())
        
    visualize(
        image=image_vis, 
        gt_mask=combine_masks(gt_mask),
        pr_mask=combine_masks(pr_mask),
    )