# TODO

- github 링크
- 의존 라이브러리들 직접 사용으로 바꾸기
- 로그파일 내용 출력해주기
- EfficientDetFinetune은 직접사용으로 변경

---

베이스라인 코드를 공유해주신 우주대마왕님께 감사드립니다.  
디텍션 문제는 처음 접해봤음에도, 우주대마왕님의 코드를 보고 mask-rcnn부터 하나씩 시도해보면서 점수를 높일 수 있었습니다.

### Summary

주로 사용된 기법은 아래와 같습니다.

- Efficientdet-d7
- HRNet-W48
- 5 fold cross validation
- test-time augmentation

### Requirements

- pytorch==1.7.1
- easydict
- yaml
- imageio
- sklearn
- albumentations

EfficientDet과 HRNet은 라이브러리를 설치해서 쓴게 아니라, 단일 `*.py`파일에 모든 코드를 합쳐서 import해서 썼습니다.  
해당 코드를 있는 그대로 노트북에 옮기기에는 너무 길기 때문에 github에 업로드해두고, 소스코드 파일을 다운로드 해서 쓰겠습니다.

EfficientDet의 pretrained weight는 https://github.com/zylo117/Yet-Another-EfficientDet-Pytorch.git  
HRNet의 pretrained weight는 https://github.com/leoxiaobin/deep-high-resolution-net.pytorch.git  
를 참고했습니다.

### Directory Structure

```
+ data
  + ori
    + train_imgs
      - ...
    + test_imgs
      - ...
    - sample_submission.csv
    - train_df.csv
    
+ networks
  + models
    - efficientdet-d7.pth
    - pose_hrnet_w48_384x288.pth
  - __init__.py
  - common.py
  - efficientdet.py
  - loader.py
  - pose_hrnet.py
  
+ results # 학습 결과가 저장될 폴더
  + effdet-train
  + hrnet-train
```

### e.t.c.

- `*.py`파일을 노트북에 합친 것이기 때문에 어색하거나 실행에 문제가 있는 점이 있을 수 있습니다.
- 문제가 있을시에 원본 [github 소스코드](https://github.com/Kitsunetic/motion-keypoint-dacon.git)를 참고해주세요.
- github 링크: https://github.com/Kitsunetic/motion-keypoint-dacon.git

# 라이브러리 다운로드

In [None]:
import requests
from pathlib import Path

In [1]:
__download_files__ = {
    # EfficientDet 모델 pretrained weights
    "networks/models/efficientdet-d7.pth": "https://github.com/Kitsunetic/motion-keypoint-dacon/releases/download/weights/efficientdet-d7.pth",
    
    # HRNet 모델 pretrained weights
    "networks/models/pose_hrnet_w48_384x288.pth": "https://github.com/Kitsunetic/motion-keypoint-dacon/releases/download/weights/pose_hrnet_w48_384x288.pth",
    
    # 소스코드 파일
    "networks/__init__.py": "",
    "networks/common.py": "",
    "networks/efficientdet.py": "",
    "networks/loader.py": "",
    "networks/pose_hrnet.py": "",
}

In [None]:
for path, url in __download_files__.items():
    path = Path(path)
    print(path)
    path.parent.mkdir(parents=True, exist_ok=True)
    with open(path, 'wb') as f:
        with requests.get(url) as response:
            f.write(response.content)

# Utility Functions

원래는 `./utils.py`에 있는 내용들입니다.

In [None]:
import math
import os
import random
import re
from copy import deepcopy
from datetime import datetime
from pathlib import Path
from typing import Iterable, List

import albumentations as A
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from albumentations.core.transforms_interface import DualTransform
from torch import Tensor
from torch.optim.lr_scheduler import _LRScheduler
from torch.utils.data import Dataset

In [None]:
def seed_everything(seed, deterministic=False):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.deterministic = deterministic
        torch.backends.cudnn.benchmark = not deterministic

In [None]:
class AverageMeter(object):
    """
    AverageMeter, referenced to https://dacon.io/competitions/official/235626/codeshare/1684
    """

    def __init__(self):
        self.sum = 0
        self.cnt = 0
        self.avg = 0

    def update(self, val, n=1):
        if n > 0:
            self.sum += val * n
            self.cnt += n
            self.avg = self.sum / self.cnt

    def get(self):
        return self.avg

    def __call__(self):
        return self.avg

In [None]:
class SAM(torch.optim.Optimizer):
    def __init__(self, params, base_optimizer, rho=0.05, **kwargs):
        assert rho >= 0.0, f"Invalid rho, should be non-negative: {rho}"

        defaults = dict(rho=rho, **kwargs)
        super(SAM, self).__init__(params, defaults)

        self.base_optimizer = base_optimizer(self.param_groups, **kwargs)
        self.param_groups = self.base_optimizer.param_groups

    @torch.no_grad()
    def first_step(self, zero_grad=False):
        grad_norm = self._grad_norm()
        for group in self.param_groups:
            scale = group["rho"] / (grad_norm + 1e-12)

            for p in group["params"]:
                if p.grad is None:
                    continue
                e_w = p.grad * scale.to(p)
                p.add_(e_w)  # climb to the local maximum "w + e(w)"
                self.state[p]["e_w"] = e_w

        if zero_grad:
            self.zero_grad()

    @torch.no_grad()
    def second_step(self, zero_grad=False):
        for group in self.param_groups:
            for p in group["params"]:
                if p.grad is None:
                    continue
                p.sub_(self.state[p]["e_w"])  # get back to "w" from "w + e(w)"

        self.base_optimizer.step()  # do the actual "sharpness-aware" update

        if zero_grad:
            self.zero_grad()

    @torch.no_grad()
    def step(self, closure=None):
        assert closure is not None, "Sharpness Aware Minimization requires closure, but it was not provided"
        closure = torch.enable_grad()(closure)  # the closure should do a full forward-backward pass

        self.first_step(zero_grad=True)
        closure()
        self.second_step()

    def _grad_norm(self):
        shared_device = self.param_groups[0]["params"][
            0
        ].device  # put everything on the same device, in case of model parallelism
        norm = torch.norm(
            torch.stack(
                [
                    p.grad.norm(p=2).to(shared_device)
                    for group in self.param_groups
                    for p in group["params"]
                    if p.grad is not None
                ]
            ),
            p=2,
        )
        return norm

In [None]:
class CustomLogger:
    def __init__(self, filename, filemode="a", use_color=True):
        filename = Path(filename)
        if filename.is_dir():
            timestr = self._get_timestr().replace(" ", "_").replace(":", "-")
            filename = filename / f"log_{timestr}.log"
        self.file = open(filename, filemode)
        self.use_color = use_color

    def _get_timestr(self):
        n = datetime.now()
        return f"{n.year:04d}-{n.month:02d}-{n.day:02d} {n.hour:02d}:{n.minute:02d}:{n.second:02d}"

    def _write(self, msg, level):
        timestr = self._get_timestr()
        out = f"[{timestr} {level}] {msg}"

        if self.use_color:
            if level == " INFO":
                print("\033[34m" + out + "\033[0m")
            elif level == " WARN":
                print("\033[35m" + out + "\033[0m")
            elif level == "ERROR":
                print("\033[31m" + out + "\033[0m")
            elif level == "FATAL":
                print("\033[43m\033[1m" + out + "\033[0m")
            else:
                print(out)
        else:
            print(out)
        self.file.write(out + "\r\n")

    def debug(self, *msg):
        msg = " ".join(map(str, msg))
        self._write(msg, "DEBUG")

    def info(self, *msg):
        msg = " ".join(map(str, msg))
        self._write(msg, " INFO")

    def warn(self, *msg):
        msg = " ".join(map(str, msg))
        self._write(msg, " WARN")

    def error(self, *msg):
        msg = " ".join(map(str, msg))
        self._write(msg, "ERROR")

    def fatal(self, *msg):
        msg = " ".join(map(str, msg))
        self._write(msg, "FATAL")

    def flush(self):
        self.file.flush()

In [None]:
def draw_keypoints(image: np.ndarray, keypoints: np.ndarray):
    edges = [
        (0, 1),
        (0, 2),
        (2, 4),
        (1, 3),
        (6, 8),
        (8, 10),
        (5, 7),
        (7, 9),
        (11, 13),
        (13, 15),
        (12, 14),
        (14, 16),
        (5, 6),
        (15, 22),
        (16, 23),
        (11, 21),
        (21, 12),
        (20, 21),
        (5, 20),
        (6, 20),
        (17, 6),
        (17, 5),
    ]
    keypoint_names = [
        "nose_x",
        "nose_y",
        "left_eye_x",
        "left_eye_y",
        "right_eye_x",
        "right_eye_y",
        "left_ear_x",
        "left_ear_y",
        "right_ear_x",
        "right_ear_y",
        "left_shoulder_x",
        "left_shoulder_y",
        "right_shoulder_x",
        "right_shoulder_y",
        "left_elbow_x",
        "left_elbow_y",
        "right_elbow_x",
        "right_elbow_y",
        "left_wrist_x",
        "left_wrist_y",
        "right_wrist_x",
        "right_wrist_y",
        "left_hip_x",
        "left_hip_y",
        "right_hip_x",
        "right_hip_y",
        "left_knee_x",
        "left_knee_y",
        "right_knee_x",
        "right_knee_y",
        "left_ankle_x",
        "left_ankle_y",
        "right_ankle_x",
        "right_ankle_y",
        "neck_x",
        "neck_y",
        "left_palm_x",
        "left_palm_y",
        "right_palm_x",
        "right_palm_y",
        "spine2(back)_x",
        "spine2(back)_y",
        "spine1(waist)_x",
        "spine1(waist)_y",
        "left_instep_x",
        "left_instep_y",
        "right_instep_x",
        "right_instep_y",
    ]
    image = image.copy()

    np.random.seed(42)
    colors = {k: tuple(map(int, np.random.randint(0, 255, 3))) for k in range(24)}
    x1, y1 = max(0, min(keypoints[:, 0]) - 10), max(0, min(keypoints[:, 1]) - 10)
    x2, y2 = min(image.shape[1], max(keypoints[:, 0]) + 10), min(image.shape[0], max(keypoints[:, 1]) + 10)
    # cv2.rectangle(image, (x1, y1), (x2, y2), (255, 100, 91), 3)

    for i, keypoint in enumerate(keypoints):
        cv2.circle(image, tuple(keypoint), 3, colors.get(i), thickness=3, lineType=cv2.FILLED)

        cv2.putText(
            image,
            f"{i}: {keypoint_names[i*2]}",
            tuple(keypoint),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.5,
            (0, 0, 0),
            1,
        )

    for i, edge in enumerate(edges):
        cv2.line(
            image,
            tuple(keypoints[edge[0]]),
            tuple(keypoints[edge[1]]),
            colors.get(edge[0]),
            3,
            lineType=cv2.LINE_AA,
        )

    return image


def draw_keypoints_show(image: np.ndarray, keypoints: np.ndarray):
    image = draw_keypoints(image, keypoints)

    plt.figure(figsize=(16, 8))
    plt.imshow(image)
    plt.axis("off")
    plt.tight_layout()
    # plt.savefig("example.png")
    # imageio.imwrite("example.png", image)
    plt.show()

In [None]:
@torch.no_grad()
def heatmaps2keypoints(p: torch.Tensor):
    if p.dim() == 3:
        W = p.size(2)
        pos = torch.argmax(p.flatten(1), 1)
        y = pos // W
        x = pos % W
        keypoint = torch.stack([x, y], 1).type(torch.float)
    elif p.dim() == 4:
        W = p.size(3)
        pos = torch.argmax(p.flatten(2), 2)
        y = pos // W
        x = pos % W
        keypoint = torch.stack([x, y], 2).type(torch.float)
    else:
        raise NotImplementedError(f"Expected input tensor dimention 3 or 4, but {p.shape}")

    return keypoint

In [None]:
@torch.no_grad()
def keypoints2heatmaps(
    k: torch.Tensor,
    h=768 // 4,
    w=576 // 4,
    smooth=False,
    smooth_size=3,
    smooth_values=[0.1, 0.4, 0.8],
):
    k = k.type(torch.int64)
    c = torch.zeros(k.size(0), h, w, dtype=torch.float32)
    for i, (x, y) in enumerate(k):
        if smooth:
            for d, s in zip(range(smooth_size, 0, -1), smooth_values):
                c[i, max(y - d, 0) : min(y + d, h), max(x - d, 0) : min(x + d, w)] = s
        c[i, y, x] = 1.0
    return c

In [None]:
def keypoint2box(keypoint, padding=0):
    return np.array(
        [
            keypoint[:, 0].min() - padding,
            keypoint[:, 1].min() - padding,
            keypoint[:, 0].max() + padding,
            keypoint[:, 1].max() + padding,
        ]
    )

In [None]:
def denormalize(
    x: torch.Tensor,
    mean=torch.tensor([0.485, 0.456, 0.406], dtype=torch.float32),
    std=torch.tensor([0.229, 0.224, 0.225], dtype=torch.float32),
):
    if x.dim() == 4:
        mean = mean.view(1, 3, 1, 1).to(x.device)
        std = std.view(1, 3, 1, 1).to(x.device)
    elif x.dim() == 3:
        mean = mean.view(3, 1, 1).to(x.device)
        std = std.view(3, 1, 1).to(x.device)

    return x * std + mean

In [None]:
class Tensor2Image:
    def __init__(
        self,
        mean: torch.Tensor = torch.tensor([0.485, 0.456, 0.406], dtype=torch.float),
        std: torch.Tensor = torch.tensor([0.229, 0.224, 0.225], dtype=torch.float),
    ):
        if not isinstance(mean, torch.Tensor):
            mean = torch.tensor(mean, dtype=torch.float)
        if not isinstance(std, torch.Tensor):
            std = torch.tensor(std, dtype=torch.float)

        self.mean = mean
        self.std = std

    def __call__(self, x: torch.Tensor):
        assert x.dim() in (3, 4)
        if x.dim() == 3:
            assert x.size(0) in (1, 3, 4)
            x = 255 * (x.permute(1, 2, 0) * self.std.view(1, 1, 3) + self.mean.view(1, 1, 3))
        if x.dim() == 4:
            assert x.size(1) in (1, 3, 4)
            x = 255 * (x.permute(0, 2, 3, 1) * self.std.view(1, 1, 1, 3) + self.mean.view(1, 1, 1, 3))
        return x.type(torch.uint8).numpy()

In [None]:
def imshows(*ims, figsize=None):
    figsize = figsize or (len(ims) * 6, 4)
    plt.figure(figsize=figsize)
    for i, im in enumerate(ims):
        plt.subplot(1, len(ims), i + 1)
        plt.imshow(im)
    plt.tight_layout()
    plt.show()

# 1. Efficientdet-d7 학습

원래는 `./config/effdet-d7.yaml`, `./main-effdet-train.py`에 있는 내용입니다.

KFold는 하지 않고, 1fold 만 학습합니다.

두 개의 파일을 생성합니다.

- `./results/effdet-train/`
- `./results/effdet-train/`

In [None]:
__effdet_config_str__ = """
seed: 20210309
result_dir: results/effdet-train
comment: null

det_model: 
  name: efficientdet-d7
  # pretrained: null
  # pretrained: results/effdet-train/ckpt-efficientdet-d7_1.pth
  pretrained: results/effdet-train/ckpt-efficientdet-d7_SAM_1.pth

dataset:
  dir: data/ori
  batch_size: 1
  num_cpus: 1
  padding: 20
  
  crop:
    - 192
    - 28
    - 1728
    - 1052

  input_width: 1536 #768 #1536
  input_height: 1024 #512 #1024
  
train:
  SAM: false
  earlystop_patience: 10
  start_epoch: 1
  final_epoch: 200
  
  folds:
    - 1
    # - 2
    # - 3
    # - 4
    # - 5
  checkpoints:
    - null
    # - null
    # - null
    # - null
    # - null

  # lr: 0.000001
  lr: 0.0001
  scheduler:
    # type: CosineAnnealingWarmUpRestarts
    # params:
    #   T_0: 10
    #   T_mult: 1
    #   eta_max: 0.001
    #   T_up: 5
    #   gamma: 0.5
    # type: CosineAnnealingWarmRestarts
    # params:
    #   T_0: 10
    #   T_mult: 1
    #   eta_min: 0.00001
    #   verbose: false
    type: ReduceLROnPlateau
    params:
      factor: 0.5
      patience: 3
      verbose: true

"""

In [None]:
import argparse
import json
import math
import os
import shutil
import sys
from multiprocessing import cpu_count
from pathlib import Path

import cv2
import imageio
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
import torch.optim as optim
import yaml
from easydict import EasyDict
from PIL import Image
from torch import nn, optim
from torch.optim import lr_scheduler
from torch.utils.data.dataloader import DataLoader
from tqdm import tqdm

import networks
import options
import utils
from datasets import get_det_dataset

In [None]:
class DetTrainOutput:
    def __init__(self):
        self.loss = utils.AverageMeter()

    def freeze(self):
        self.loss = self.loss()
        return self

In [None]:
class DetTrainer:
    _tqdm_ = dict(ncols=100, leave=False, file=sys.stdout)

    def __init__(self, C, fold=1, checkpoint=None):
        self.C = C
        self.fold = fold

        self.det_model = networks.EfficientDetFinetune(
            self.C.det_model.name, pretrained=True, finetune=self.C.det_model.finetune.do
        )
        self.det_model.cuda()

        # Optimizer
        if self.C.train.SAM:
            self.optimizer = utils.SAM(self.det_model.parameters(), optim.AdamW, lr=self.C.lr)
        else:
            self.optimizer = optim.AdamW(self.det_model.parameters(), lr=self.C.lr)

        self.epoch = self.C.start_epoch
        self.best_loss = math.inf
        self.earlystop_cnt = 0

        # Dataset
        self.dl_train, self.dl_valid = get_det_dataset(C, self.fold)

        # Load Checkpoint
        if self.C.det_model.pretrained is not None:
            self.load(self.C.det_model.pretrained)

        if checkpoint is not None:
            self.load(checkpoint)

        # Scheduler
        self.scheduler = options.get_scheduler(self.C, self.optimizer, self.epoch - 2)

    def save(self, path):
        torch.save(
            {
                "model": self.det_model.state_dict(),
                "optimizer": self.optimizer.state_dict(),
                "epoch": self.epoch,
                "best_loss": self.best_loss,
                "earlystop_cnt": self.earlystop_cnt,
            },
            path,
        )

    def load(self, path):
        print("Load pretrained", path)
        ckpt = torch.load(path)
        self.det_model.load_state_dict(ckpt["model"])
        self.optimizer.load_state_dict(ckpt["optimizer"])
        self.epoch = ckpt["epoch"] + 1
        self.best_loss = ckpt["best_loss"]
        self.earlystop_cnt = ckpt["earlystop_cnt"]

    def close(self):
        self.logger.close()

    def train_loop(self):
        self.det_model.train()

        O = DetTrainOutput()
        with tqdm(total=len(self.dl_train.dataset), **self._tqdm_, desc=f"Train {self.epoch:03d}") as t:
            for files, imgs, annots in self.dl_train:
                imgs_, annots_ = imgs.cuda(non_blocking=True), annots.cuda(non_blocking=True)
                loss = self.det_model(imgs_, annots_)

                self.optimizer.zero_grad()
                loss.backward()
                if isinstance(self.optimizer, utils.SAM):
                    self.optimizer.first_step()
                    self.det_model(imgs_, annots_).backward()
                    self.optimizer.second_step()
                else:
                    self.optimizer.step()

                O.loss.update(loss.item(), len(files))
                t.set_postfix_str(f"loss: {loss.item():.6f}", refresh=False)
                t.update(len(files))

        return O.freeze()

    @torch.no_grad()
    def valid_loop(self):
        self.det_model.eval()

        O = DetTrainOutput()
        with tqdm(total=len(self.dl_valid.dataset), **self._tqdm_, desc=f"Valid {self.epoch:03d}") as t:
            for files, imgs, annots in self.dl_valid:
                imgs_, annots_ = imgs.cuda(non_blocking=True), annots.cuda(non_blocking=True)
                loss = self.det_model(imgs_, annots_)

                O.loss.update(loss.item(), len(files))
                t.set_postfix_str(f"loss: {loss.item():.6f}", refresh=False)
                t.update(len(files))

        return O.freeze()

    @torch.no_grad()
    def callback(self, to: DetTrainOutput, vo: DetTrainOutput):
        self.C.log.info(
            f"Epoch: {self.epoch:03d},",
            f"loss: {to.loss:.6f};{vo.loss:.6f},",
        )
        self.C.log.flush()

        if isinstance(self.scheduler, lr_scheduler.CosineAnnealingWarmRestarts):
            self.scheduler.step()
        elif isinstance(self.scheduler, lr_scheduler.ReduceLROnPlateau):
            self.scheduler.step(vo.loss)

        if self.best_loss > vo.loss:
            self.best_loss = vo.loss
            self.earlystop_cnt = 0
            self.save(Path(self.C.result_dir) / f"ckpt-{self.C.uid}_{self.fold}.pth")
        else:
            self.earlystop_cnt += 1

    def fit(self):
        for self.epoch in range(self.epoch, self.C.final_epoch + 1):
            if self.C.finetune.do:
                if self.epoch <= self.C.finetune.step1_epochs:
                    self.det_model.unfreeze_tail()
                elif self.epoch <= self.C.finetune.step2_epochs:
                    self.det_model.unfreeze_head()
                else:
                    self.det_model.unfreeze()

            to = self.train_loop()
            vo = self.valid_loop()
            self.callback(to, vo)

            if self.earlystop_cnt > self.C.earlystop_patience:
                self.C.log.info(f"Stop training at epoch", self.epoch)
                break

        self.load(Path(self.C.result_dir) / f"ckpt-{self.C.uid}_{self.fold}.pth")

In [None]:
def main_det_train():
    C = EasyDict(yaml.load(__effdet_config_str__, yaml.FullLoader))

    for fold, checkpoint in zip(C.train.folds, C.train.checkpoints):
        with open(args.config_file, "r") as f:
            C = EasyDict(yaml.load(f, yaml.FullLoader))
            Path(C.result_dir).mkdir(parents=True, exist_ok=True)

        if C.dataset.num_cpus < 0:
            C.dataset.num_cpus = cpu_count()
        C.uid = f"{C.det_model.name}"
        C.uid += f"-sam" if C.train.SAM else ""
        C.uid += f"-{C.dataset.input_width}x{C.dataset.input_height}"
        C.uid += f"-pad{C.dataset.padding}"
        C.uid += f"-{C.comment}" if C.comment is not None else ""
        C.uid += f"_{C.train.fold}"

        trainer = DetTrainer(C, fold, checkpoint)
        trainer.fit()

In [None]:
main_det_train()

# 2. Efficientdet-d7으로 test_imgs에서 roi만 잘라내기