In [1]:
%load_ext lab_black

처음에는 ground truth keypoint를 기반으로 얻은 가상의 bounding box로 detection model을 학습시켰지만,
가상의 bounding box가 정확도가 떨어지기 때문에 모델이 오히려 잘못된 feature를 학습하고 성능이 떨어지는 경우를 봤습니다.
하지만 한편으로는 가상의 bounding box로 학습을 한 경우가 더 잘 탐지하는 경우도 있기 때문에(특히 운동기구 등으로 몸이 일부가 가려진 경우) 둘 모두를 사용했습니다.

In [15]:
import argparse
import json
import math
import os
import random
import shutil
import sys
import random
from copy import deepcopy
from datetime import datetime
from multiprocessing import cpu_count
from pathlib import Path
from pprint import pformat
from typing import Iterable, List

import albumentations as A
import cv2
import imageio
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
import torch.optim as optim
import yaml
from albumentations.pytorch import ToTensorV2
from easydict import EasyDict
from PIL import Image
from sklearn.model_selection import KFold, StratifiedKFold
from torch import nn, optim
from torch.optim import lr_scheduler
from torch.optim.lr_scheduler import _LRScheduler
from torch.utils.data import DataLoader, Dataset, Subset
from tqdm import tqdm

from effdet_torch_singlefile import EfficientDet

In [3]:
# 최정명님이 공유해주신 잘못된 데이터들
error_list = [
    317,
    869,
    873,
    877,
    911,
    1559,
    1560,
    1562,
    1566,
    1575,
    1577,
    1578,
    1582,
    1606,
    1607,
    1622,
    1623,
    1624,
    1625,
    1629,
    3968,
    4115,
    4116,
    4117,
    4118,
    4119,
    4120,
    4121,
    4122,
    4123,
    4124,
    4125,
    4126,
    4127,
    4128,
    4129,
    4130,
    4131,
    4132,
    4133,
    4134,
    4135,
    4136,
    4137,
    4138,
    4139,
    4140,
    4141,
    4142,
    4143,
    4144,
    4145,
    4146,
    4147,
    4148,
    4149,
    4150,
    4151,
    4152,
    4153,
    4154,
    4155,
    4156,
    4157,
    4158,
    4159,
    4160,
    4161,
    4162,
    4163,
    4164,
    4165,
    4166,
    4167,
    4168,
    4169,
    4170,
    4171,
    4172,
    4173,
    4174,
    4175,
    4176,
    4177,
    4178,
    4179,
    4180,
    4181,
    4182,
    4183,
    4184,
    4185,
    4186,
    4187,
    4188,
    4189,
    4190,
    4191,
    4192,
    4193,
    4194,
]
# 20210323 추가
error_list.extend([1516, 1597, 2221, 2808, 2821, 3081, 3084, 3085, 3090, 3093, 3283, 3284])

In [4]:
def seed_everything(seed, deterministic=False):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.deterministic = deterministic
        torch.backends.cudnn.benchmark = not deterministic

In [5]:
class AverageMeter(object):
    """
    AverageMeter, referenced to https://dacon.io/competitions/official/235626/codeshare/1684
    """

    def __init__(self):
        self.sum = 0
        self.cnt = 0
        self.avg = 0

    def update(self, val, n=1):
        if n > 0:
            self.sum += val * n
            self.cnt += n
            self.avg = self.sum / self.cnt

    def get(self):
        return self.avg

    def __call__(self):
        return self.avg

## Dataset

In [6]:
def keypoint2box(keypoint, padding=0):
    return np.array(
        [
            keypoint[:, 0].min() - padding,
            keypoint[:, 1].min() - padding,
            keypoint[:, 0].max() + padding,
            keypoint[:, 1].max() + padding,
        ]
    )

In [7]:
class HorizontalFlipEx(A.HorizontalFlip):
    swap_columns = [(1, 2), (3, 4), (5, 6), (7, 8), (9, 10), (11, 12), (13, 14), (15, 16), (18, 19), (22, 23)]

    def apply_to_keypoints(self, keypoints, **params):
        keypoints = super().apply_to_keypoints(keypoints, **params)

        # left/right 키포인트들은 서로 swap해주기
        for a, b in self.swap_columns:
            temp1 = deepcopy(keypoints[a])
            temp2 = deepcopy(keypoints[b])
            keypoints[a] = temp2
            keypoints[b] = temp1

        return keypoints


class VerticalFlipEx(A.VerticalFlip):
    swap_columns = [(1, 2), (3, 4), (5, 6), (7, 8), (9, 10), (11, 12), (13, 14), (15, 16), (18, 19), (22, 23)]

    def apply_to_keypoints(self, keypoints, **params):
        keypoints = super().apply_to_keypoints(keypoints, **params)

        # left/right 키포인트들은 서로 swap해주기
        for a, b in self.swap_columns:
            temp1 = deepcopy(keypoints[a])
            temp2 = deepcopy(keypoints[b])
            keypoints[a] = temp2
            keypoints[b] = temp1

        return keypoints

In [8]:
class DetDataset(Dataset):
    def __init__(self, config, files, keypoints, augmentation):
        super().__init__()
        self.C = config
        self.files = files
        self.keypoints = keypoints

        T = []
        T.append(A.Crop(*self.C.dataset.crop))
        T.append(A.Resize(self.C.dataset.input_height, self.C.dataset.input_width))
        if augmentation:
            T_ = []
            T_.append(A.Cutout(num_holes=16, max_h_size=100, max_w_size=100, fill_value=0, p=1))
            T_.append(A.Cutout(num_holes=16, max_h_size=100, max_w_size=100, fill_value=255, p=1))
            T_.append(A.Cutout(num_holes=16, max_h_size=100, max_w_size=100, fill_value=128, p=1))
            T_.append(A.Cutout(num_holes=16, max_h_size=100, max_w_size=100, fill_value=192, p=1))
            T_.append(A.Cutout(num_holes=16, max_h_size=100, max_w_size=100, fill_value=64, p=1))
            T_.append(A.Cutout(num_holes=5, max_h_size=1920, max_w_size=50, fill_value=0, p=1))
            T_.append(A.Cutout(num_holes=5, max_h_size=1920, max_w_size=50, fill_value=255, p=1))
            T_.append(A.Cutout(num_holes=5, max_h_size=1920, max_w_size=50, fill_value=128, p=1))
            T_.append(A.Cutout(num_holes=5, max_h_size=1920, max_w_size=50, fill_value=192, p=1))
            T_.append(A.Cutout(num_holes=5, max_h_size=1920, max_w_size=50, fill_value=64, p=1))
            T_.append(A.Cutout(num_holes=5, max_h_size=30, max_w_size=1080, fill_value=0, p=1))
            T_.append(A.Cutout(num_holes=5, max_h_size=30, max_w_size=1080, fill_value=255, p=1))
            T_.append(A.Cutout(num_holes=5, max_h_size=30, max_w_size=1080, fill_value=128, p=1))
            T_.append(A.Cutout(num_holes=5, max_h_size=30, max_w_size=1080, fill_value=192, p=1))
            T_.append(A.Cutout(num_holes=5, max_h_size=30, max_w_size=1080, fill_value=64, p=1))
            # T_.append(A.Cutout(max_h_size=20, max_w_size=20))
            # T_.append(A.Cutout(max_h_size=20, max_w_size=20, fill_value=255))
            # T_.append(A.Cutout(max_h_size=self.C.dataset.input_height // 2, max_w_size=10, fill_value=255))
            # T_.append(A.Cutout(max_h_size=self.C.dataset.input_height // 2, max_w_size=10, fill_value=0))
            # T_.append(A.Cutout(max_h_size=10, max_w_size=self.C.dataset.input_width // 2, fill_value=255))
            # T_.append(A.Cutout(max_h_size=10, max_w_size=self.C.dataset.input_width // 2, fill_value=0))
            T.append(A.OneOf(T_))

            T.append(A.ShiftScaleRotate(border_mode=cv2.BORDER_CONSTANT))
            T.append(HorizontalFlipEx())
            T.append(VerticalFlipEx())
            # T.append(A.RandomRotate90()) # batch-augmentation으로 대체

            T_ = []
            T_.append(A.RandomBrightnessContrast())
            T_.append(A.RandomGamma())
            T_.append(A.RandomBrightness())
            T_.append(A.RandomContrast())
            T.append(A.OneOf(T_))

            T_ = []
            T_.append(A.MotionBlur(p=1))
            T_.append(A.GaussNoise(p=1))
            T.append(A.OneOf(T_))
        T.append(A.Normalize())
        T.append(ToTensorV2())

        self.transform = A.Compose(
            transforms=T,
            bbox_params=A.BboxParams(format="pascal_voc", label_fields=["labels"]),
            # keypoint_params=A.KeypointParams(format="xy", remove_invisible=False),
        )

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        file = str(self.files[idx])
        image = imageio.imread(file)

        keypoint = self.keypoints[idx]
        box = keypoint2box(keypoint, self.C.dataset.padding)
        box = np.expand_dims(box, 0)
        labels = np.array([0], dtype=np.int64)
        a = self.transform(image=image, labels=labels, bboxes=box)

        image = a["image"]

        annot = np.zeros((1, 5), dtype=np.float32)
        annot[0, :4] = a["bboxes"][0]
        annot = torch.tensor(annot, dtype=torch.float32)

        return file, image, annot

In [9]:
def get_det_dataset(C, fold):
    datadir = Path(C.dataset.dir)
    total_imgs = np.array(sorted(list((datadir / "train_imgs").glob("*.jpg"))))
    df = pd.read_csv(datadir / "train_df.csv")
    total_keypoints = df.to_numpy()[:, 1:].astype(np.float32)
    total_keypoints = np.stack([total_keypoints[:, 0::2], total_keypoints[:, 1::2]], axis=2)

    # 오류가 있는 데이터는 학습에서 제외
    total_imgs_, total_keypoints_ = [], []
    for i in range(len(total_imgs)):
        if i not in error_list:
            total_imgs_.append(total_imgs[i])
            total_keypoints_.append(total_keypoints[i])
    total_imgs = np.array(total_imgs_)
    total_keypoints = np.array(total_keypoints_)

    # KFold
    if C.dataset.group_kfold:
        skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=C.seed)
        # 파일 이름 앞 17자리를 group으로 이미지를 분류 (파일이 너무 잘 섞여도 안됨)
        groups = []
        last_group = 0
        last_stem = total_imgs[0].name[:17]
        for f in total_imgs:
            stem = f.name[:17]
            if stem == last_stem:
                groups.append(last_group)
            else:
                last_group += 1
                last_stem = stem
                groups.append(last_group)
        indices = list(skf.split(total_imgs, groups))
    else:
        kf = KFold(n_splits=5, shuffle=True, random_state=C.seed)
        indices = list(kf.split(total_imgs))
    train_idx, valid_idx = indices[fold - 1]

    # 데이터셋 생성
    ds_train = DetDataset(
        C,
        total_imgs[train_idx],
        total_keypoints[train_idx],
        augmentation=True,
    )
    ds_valid = DetDataset(
        C,
        total_imgs[valid_idx],
        total_keypoints[valid_idx],
        augmentation=False,
    )
    dl_train = DataLoader(
        ds_train,
        batch_size=C.dataset.batch_size,
        num_workers=C.dataset.num_cpus,
        shuffle=True,
        pin_memory=True,
    )
    dl_valid = DataLoader(
        ds_valid,
        batch_size=C.dataset.batch_size,
        num_workers=C.dataset.num_cpus,
        shuffle=False,
        pin_memory=True,
    )

    return dl_train, dl_valid

## Train

In [10]:
class DetTrainOutput:
    def __init__(self):
        self.loss = AverageMeter()

    def freeze(self):
        self.loss = self.loss()
        return self

In [11]:
class DetTrainer:
    _tqdm_ = dict(ncols=100, leave=False, file=sys.stdout)

    def __init__(self, C, fold=1, checkpoint=None):
        self.C = C
        self.fold = fold

        self.det_model = EfficientDet(self.C.det_model.name, pretrained=True)
        self.det_model.cuda()

        # Optimizer
        self.optimizer = optim.AdamW(self.det_model.parameters(), lr=self.C.train.lr)

        self.epoch = self.C.train.start_epoch
        self.best_loss = math.inf
        self.earlystop_cnt = 0

        # Dataset
        self.dl_train, self.dl_valid = get_det_dataset(C, self.fold)

        # Load Checkpoint
        if checkpoint is not None:
            self.load(checkpoint)

        # Scheduler
        self.scheduler = lr_scheduler.ReduceLROnPlateau(self.optimizer, **self.C.train.scheduler.params)

    def save(self, path):
        torch.save(
            {
                "model": self.det_model.state_dict(),
                "optimizer": self.optimizer.state_dict(),
                "epoch": self.epoch,
                "best_loss": self.best_loss,
                "earlystop_cnt": self.earlystop_cnt,
            },
            path,
        )

    def load(self, path):
        print("Load pretrained", path)
        ckpt = torch.load(path)
        self.det_model.load_state_dict(ckpt["model"])
        self.optimizer.load_state_dict(ckpt["optimizer"])
        self.epoch = ckpt["epoch"] + 1
        self.best_loss = ckpt["best_loss"]
        self.earlystop_cnt = ckpt["earlystop_cnt"]

    def train_loop(self):
        self.det_model.train()

        O = DetTrainOutput()
        with tqdm(total=len(self.dl_train.dataset), **self._tqdm_, desc=f"Train {self.epoch:03d}") as t:
            for files, imgs, annots in self.dl_train:
                imgs_, annots_ = imgs.cuda(non_blocking=True), annots.cuda(non_blocking=True)

                # batch augmentation
                if self.C.train.batch_augmentation:
                    h, w = imgs.shape[2:]

                    # downsample
                    if random.random() <= 0.5:
                        imgs_ = F.interpolate(imgs_, (h // 2, w // 2))
                        annots_[..., :4] *= 0.5

                    # rotation
                    if random.random() <= 0.5:
                        k = random.randint(1, 3)
                        a, b, c, d = annots_[..., 0], annots_[..., 1], annots_[..., 2], annots_[..., 3]
                        e = annots_[..., 4]
                        if k == 1:
                            annots_ = torch.stack([b, w - c, d, w - a, e], dim=2)
                        elif k == 2:
                            annots_ = torch.stack([w - c, h - d, w - a, h - b, e], dim=2)
                        elif k == 3:
                            annots_ = torch.stack([h - d, a, h - b, c, e], dim=2)
                        imgs_ = torch.rot90(imgs_, k=k, dims=(2, 3))

                loss = self.det_model(imgs_, annots_)

                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

                O.loss.update(loss.item(), len(files))
                t.set_postfix_str(f"loss: {loss.item():.6f}", refresh=False)
                t.update(len(files))

        return O.freeze()

    @torch.no_grad()
    def valid_loop(self):
        self.det_model.eval()

        O = DetTrainOutput()
        with tqdm(total=len(self.dl_valid.dataset), **self._tqdm_, desc=f"Valid {self.epoch:03d}") as t:
            for files, imgs, annots in self.dl_valid:
                imgs_, annots_ = imgs.cuda(non_blocking=True), annots.cuda(non_blocking=True)
                loss = self.det_model(imgs_, annots_)

                O.loss.update(loss.item(), len(files))
                t.set_postfix_str(f"loss: {loss.item():.6f}", refresh=False)
                t.update(len(files))

        return O.freeze()

    @torch.no_grad()
    def callback(self, to: DetTrainOutput, vo: DetTrainOutput):
        print(
            f"Epoch: {self.epoch:03d},",
            f"loss: {to.loss:.6f};{vo.loss:.6f},",
        )

        self.scheduler.step(vo.loss)

        if self.best_loss > vo.loss:
            self.best_loss = vo.loss
            self.earlystop_cnt = 0
            self.save(self.C.result_dir / f"effdet_d7_{self.fold}.pth")
        else:
            self.earlystop_cnt += 1

    def fit(self):
        for self.epoch in range(self.epoch, self.C.train.final_epoch + 1):
            to = self.train_loop()
            vo = self.valid_loop()
            self.callback(to, vo)

            if self.earlystop_cnt > self.C.train.earlystop_patience:
                print(f"Stop training at epoch", self.epoch)
                break

        self.load(self.C.result_dir / f"effdet_d7_{self.fold}.pth")

In [12]:
__effdet_train_config__ = """
seed: 20210309
result_dir: results/submit

det_model: 
  name: efficientdet-d7

dataset:
  dir: data/ori
  batch_size: 2
  num_cpus: 1
  padding: 20
  
  crop:
    - 192
    - 28
    - 1728
    - 1052

  input_width: 768 # 1536
  input_height: 512 # 1024
  
train:
  earlystop_patience: 10
  start_epoch: 1
  final_epoch: 1
  
  batch_augmentation: true
  
  folds:
    - 1
  checkpoints:
    - null

  lr: 0.0001
  scheduler:
    type: ReduceLROnPlateau
    params:
      factor: 0.5
      patience: 3
      verbose: true
"""

In [13]:
def main():
    C = EasyDict(yaml.load(__effdet_train_config__, yaml.FullLoader))
    fold, checkpoint = C.train.folds[0], C.train.checkpoints[0]

    Path(C.result_dir).mkdir(parents=True, exist_ok=True)

    if C.dataset.num_cpus < 0:
        C.dataset.num_cpus = cpu_count()

    C.result_dir = Path(C.result_dir)
    C.dataset.dir = Path(C.dataset.dir)
    seed_everything(C.seed)

    trainer = DetTrainer(C, fold, checkpoint)
    trainer.fit()

In [16]:
main()

Load pretrained /home/s0/.cache/torch/hub/checkpoints/efficientdet-d7.pth
Epoch: 001, loss: 1.374408;0.734192,                                                                
Load pretrained results/submit/effdet_d7_1.pth


FileNotFoundError: [Errno 2] No such file or directory: 'results/submit/effdet_d7_1.pth'

```log
[2021-04-04 23:09:27  INFO] Epoch: 001, loss: 1.345929;0.607354,
[2021-04-04 23:18:55  INFO] Epoch: 002, loss: 1.032379;0.974972,
[2021-04-04 23:28:11  INFO] Epoch: 003, loss: 0.899945;0.447793,
[2021-04-04 23:37:33  INFO] Epoch: 004, loss: 0.831420;0.583468,
[2021-04-04 23:46:44  INFO] Epoch: 005, loss: 0.799242;1.710531,
[2021-04-04 23:56:01  INFO] Epoch: 006, loss: 0.755635;0.422747,
[2021-04-05 00:05:15  INFO] Epoch: 007, loss: 0.761829;0.480131,
[2021-04-05 00:14:30  INFO] Epoch: 008, loss: 0.680230;2.825914,
[2021-04-05 00:23:47  INFO] Epoch: 009, loss: 0.699145;2.252932,
[2021-04-05 00:33:00  INFO] Epoch: 010, loss: 0.668748;0.433503,
[2021-04-05 00:42:16  INFO] Epoch: 011, loss: 0.559386;0.215368,
[2021-04-05 00:51:32  INFO] Epoch: 012, loss: 0.500916;0.258114,
[2021-04-05 01:00:47  INFO] Epoch: 013, loss: 0.483150;0.247860,
[2021-04-05 01:09:58  INFO] Epoch: 014, loss: 0.458754;0.172292,
[2021-04-05 01:19:22  INFO] Epoch: 015, loss: 0.432042;0.156415,
[2021-04-05 01:28:40  INFO] Epoch: 016, loss: 0.423682;0.172074,
[2021-04-05 01:38:07  INFO] Epoch: 017, loss: 0.419228;0.227202,
[2021-04-05 01:47:17  INFO] Epoch: 018, loss: 0.426307;0.202776,
[2021-04-05 01:56:38  INFO] Epoch: 019, loss: 0.422774;0.183640,
[2021-04-05 02:05:52  INFO] Epoch: 020, loss: 0.356231;0.131375,
[2021-04-05 02:15:05  INFO] Epoch: 021, loss: 0.325380;0.121130,
[2021-04-05 02:24:25  INFO] Epoch: 022, loss: 0.334707;0.121304,
[2021-04-05 02:33:40  INFO] Epoch: 023, loss: 0.315013;0.123282,
[2021-04-05 02:43:00  INFO] Epoch: 024, loss: 0.308335;0.137350,
[2021-04-05 02:52:19  INFO] Epoch: 025, loss: 0.316076;0.106638,
[2021-04-05 03:01:39  INFO] Epoch: 026, loss: 0.303245;0.103056,
[2021-04-05 03:10:51  INFO] Epoch: 027, loss: 0.301873;0.114936,
[2021-04-05 03:20:15  INFO] Epoch: 028, loss: 0.307543;0.114505,
[2021-04-05 03:29:34  INFO] Epoch: 029, loss: 0.298067;0.109576,
[2021-04-05 03:38:48  INFO] Epoch: 030, loss: 0.294818;0.132326,
[2021-04-05 03:48:07  INFO] Epoch: 031, loss: 0.275761;0.088775,
[2021-04-05 03:57:28  INFO] Epoch: 032, loss: 0.264910;0.097282,
[2021-04-05 04:06:49  INFO] Epoch: 033, loss: 0.265827;0.094159,
[2021-04-05 04:16:11  INFO] Epoch: 034, loss: 0.253544;0.103739,
[2021-04-05 04:25:28  INFO] Epoch: 035, loss: 0.264441;0.090745,
[2021-04-05 04:34:50  INFO] Epoch: 036, loss: 0.250116;0.080270,
[2021-04-05 04:44:12  INFO] Epoch: 037, loss: 0.248722;0.090331,
[2021-04-05 04:53:26  INFO] Epoch: 038, loss: 0.243801;0.083525,
[2021-04-05 05:02:41  INFO] Epoch: 039, loss: 0.229252;0.083217,
[2021-04-05 05:12:03  INFO] Epoch: 040, loss: 0.227173;0.093092,
[2021-04-05 05:21:16  INFO] Epoch: 041, loss: 0.233974;0.078002,
[2021-04-05 05:30:35  INFO] Epoch: 042, loss: 0.224974;0.077236,
[2021-04-05 05:39:51  INFO] Epoch: 043, loss: 0.239731;0.077556,
[2021-04-05 05:49:09  INFO] Epoch: 044, loss: 0.231345;0.077701,
[2021-04-05 05:58:28  INFO] Epoch: 045, loss: 0.222928;0.079066,
[2021-04-05 06:07:48  INFO] Epoch: 046, loss: 0.229059;0.085703,
[2021-04-05 06:17:01  INFO] Epoch: 047, loss: 0.238931;0.079773,
[2021-04-05 06:26:27  INFO] Epoch: 048, loss: 0.233236;0.077964,
[2021-04-05 06:35:47  INFO] Epoch: 049, loss: 0.231080;0.079676,
[2021-04-05 06:45:00  INFO] Epoch: 050, loss: 0.225975;0.077771,
[2021-04-05 06:54:25  INFO] Epoch: 051, loss: 0.226893;0.070260,
[2021-04-05 07:03:43  INFO] Epoch: 052, loss: 0.241337;0.075432,
[2021-04-05 07:13:05  INFO] Epoch: 053, loss: 0.226088;0.071070,
[2021-04-05 07:22:24  INFO] Epoch: 054, loss: 0.222690;0.075063,
[2021-04-05 07:31:41  INFO] Epoch: 055, loss: 0.226015;0.076678,
[2021-04-05 07:40:57  INFO] Epoch: 056, loss: 0.228466;0.081537,
[2021-04-05 07:50:20  INFO] Epoch: 057, loss: 0.220756;0.076941,
[2021-04-05 07:59:38  INFO] Epoch: 058, loss: 0.225289;0.075737,
[2021-04-05 08:08:54  INFO] Epoch: 059, loss: 0.233874;0.073094,
[2021-04-05 08:18:13  INFO] Epoch: 060, loss: 0.211533;0.076971,
[2021-04-05 08:27:30  INFO] Epoch: 061, loss: 0.220242;0.077371,
[2021-04-05 08:36:49  INFO] Epoch: 062, loss: 0.220992;0.071223,
[2021-04-05 08:36:49  INFO] Stop training at epoch 62
```