이미지 크기 차이가 나는건 일단 어찌해결할지 생각이 안남.  
box를 자를 때 비율이 맞도록 자르는 방법?  
근데 원래 이미지 자체가 비율이 안맞는데(누워있거나 서있거나) 어려울 것 같다.

우선 augmentation은 쓰지 않고, 기본 성능을 측정해보자

test는 누워서 하는 동작이 절반인데 train에선 별로 없고, 사람도 다름  
철봉 운동에서 일부 손이 짤린게 있음

---

## 라이브러리 로딩

In [1]:
%load_ext lab_black

In [2]:
import math
import os
import random
import shutil
import json
import logging
import sys
from collections import defaultdict
from datetime import datetime
from io import TextIOWrapper
from pathlib import Path
from typing import Callable, List, Sequence, Tuple

import albumentations as A
import cv2
import imageio
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from albumentations.pytorch import ToTensorV2
from sklearn.model_selection import KFold
from torch import nn, optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader, Dataset, Subset
from torch.utils.tensorboard import SummaryWriter
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from tqdm import tqdm

import utils
import networks

---

## 하이퍼 파라미터

In [3]:
RESULT_DIR = Path("results/HRNet학습")

In [4]:
LR = 1e-4  # transfer learning이니깐 좀 작게 주는게 좋을 것 같아서 1e-4
BATCH_SIZE = 10
START_EPOCH = 1
SAM = False
FOLDS = [1, 2, 3, 4, 5]
HRNET_WIDTH = 48
USE_L1 = False

In [5]:
n = datetime.now()
UID = f"{n.year:04d}{n.month:02d}{n.day:02d}-{n.hour:02d}{n.minute:02d}{n.second:02d}"
SEED = 20210309

In [6]:
utils.seed_everything(SEED, deterministic=False)
RESULT_DIR.mkdir(parents=True, exist_ok=True)
log = utils.CustomLogger(RESULT_DIR / f"log_{UID}.log", "a")
log.info("학습 시작")
log.info("UID:", UID)
log.info("SEED:", SEED)
log.info("LR:", LR)
log.info("BATCH_SIZE:", BATCH_SIZE)
log.info("START_EPOCH:", START_EPOCH)
log.info("SAM:", SAM)
log.info("FOLDS:", FOLDS)
log.info("HRNET_WIDTH:", HRNET_WIDTH)
log.info("USE_L1:", USE_L1)

[34m[2021-03-10 21:01:30  INFO] 학습 시작[0m


---

## 데이터 로딩

In [7]:
train_imgs = np.array(sorted(list(Path("data/box2/train_imgs/").glob("*.jpg"))))
test_imgs = np.array(sorted(list(Path("data/box2/test_imgs/").glob("*.jpg"))))

In [8]:
keypoints = pd.read_csv("data/ori/train_df.csv").to_numpy()[:, 1:].astype(np.float32)
keypoints = np.stack([keypoints[:, 0::2], keypoints[:, 1::2]], axis=2)

In [9]:
with open("data/box2/offset.json", "r") as f:
    offsets = json.load(f)

In [10]:
class ImageDataset(Dataset):
    def __init__(self, files, offsets, keypoints=None):
        super().__init__()
        self.files = files
        self.offsets = offsets
        self.keypoints = keypoints

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        f = self.files[idx]
        img = imageio.imread(f)
        H, W, _ = img.shape
        # TODO 가로로 긴 영상이면 가로 길이가 768이 되도록 만들기
        ratio = torch.tensor([576 / W, 768 / H], dtype=torch.float32)
        img = cv2.resize(img, (576, 768))
        x = torch.as_tensor(img, dtype=torch.float32).permute(2, 0, 1) / 255.0
        offset = torch.tensor(self.offsets[idx]["boxes"][:2], dtype=torch.int64)

        if self.keypoints is not None:
            keypoint = torch.tensor(self.keypoints[idx], dtype=torch.float32)
            keypoint[:, 0] = (keypoint[:, 0] - offset[0]) * ratio[0] / 4
            keypoint[:, 1] = (keypoint[:, 1] - offset[1]) * ratio[1] / 4
            keypoint = keypoint.type(torch.int64)
            # TODO: 나중에 augmentation 추가

            """# 좌표값 keypoint를 24차원 평면으로 변환
            y = torch.zeros(24, 768 // 4, 576 // 4, dtype=torch.int64)
            for i in range(24):
                y[i, keypoint[i, 1] // 4, keypoint[i, 0] // 4] = 1"""
            # 좌표값 keypoint를 1차원 벡터의 위치 값으로 변환
            y = keypoint[:, 0] + keypoint[:, 1] * (576 // 4)

            return f.name, x, offset, ratio, y
        return f.name, x, offset, ratio

In [11]:
ds_train_total = ImageDataset(train_imgs, offsets["train"], keypoints)
ds_test = ImageDataset(test_imgs, offsets["test"])

In [12]:
dl_kwargs = dict(batch_size=BATCH_SIZE, num_workers=4)

---

## 학습 준비

학습은 crossentropy, 학습 중간에 RMSE 표시

In [13]:
model = networks.PoseHighResolutionNet(width=HRNET_WIDTH)

In [14]:
model.load_state_dict(torch.load(f"networks/models/pose_hrnet_w{HRNET_WIDTH}_384x288.pth"))

<All keys matched successfully>

In [15]:
final_layer = nn.Conv2d(32, 24, 1)
with torch.no_grad():
    final_layer.weight[:17] = model.final_layer.weight
    final_layer.bias[:17] = model.final_layer.bias
model.final_layer = final_layer

In [16]:
model = model.cuda()

In [17]:
if SAM:
    optimizer = utils.SAM(model.parameters(), optim.AdamW, lr=LR)
else:
    optimizer = optim.AdamW(model.parameters(), lr=LR)

In [18]:
class KeypointLoss(nn.Module):
    def forward(self, x, y):
        x = x.flatten(2).flatten(0, 1)
        y = y.flatten(0, 1)
        return F.cross_entropy(x, y)

In [19]:
class KeypointRMSE(nn.Module):
    @torch.no_grad()
    def forward(self, x, y):
        W = x.size(3)
        xp = x.flatten(2).argmax(2)
        xx, xy = xp % W, xp // W
        yx, yy = y % W, y // W
        return 4 * ((xx - yx) ** 2 + (xy - yy) ** 2).type(torch.float32).mean().sqrt()

In [20]:
if USE_L1:
    criterion = nn.L1Loss().cuda()
else:
    criterion = KeypointLoss().cuda()
criterion_rmse = KeypointRMSE().cuda()

In [21]:
scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=4, verbose=True)

---

## 학습

In [22]:
def train_loop(dl: DataLoader):
    torch.cuda.empty_cache()
    model.train()

    meanloss = utils.AverageMeter()
    meanrmse = utils.AverageMeter()
    results = {"image": [], "loss": [], "rmse": []}
    with tqdm(total=len(dl.dataset), ncols=100, leave=False, file=sys.stdout, desc=f"Train[{epoch:03d}]") as t:
        for f, x, offset, ratio, y in dl:
            x_ = x.cuda()
            y_ = y.cuda()
            p_ = model(x_)
            loss = criterion(p_, y_)
            rmse = criterion_rmse(p_, y_)
            
            optimizer.zero_grad()
            loss.backward()
            if isinstance(optimizer, utils.SAM):
                optimizer.first_step()
                loss = criterion(model(x_), y_).backward()
                optimizer.second_step()
            else:
                optimizer.step()

            meanloss.update(loss.item())
            meanrmse.update(rmse.item())
            results["image"].append(f)
            results["loss"].append(loss.item())
            results["rmse"].append(rmse.item())
            t.set_postfix_str(f"loss: {loss.item():.6f}, rmse: {rmse.item():.6f}", refresh=False)
            t.update(len(x))

    return meanloss(), meanrmse(), results

In [23]:
@torch.no_grad()
def valid_loop(dl: DataLoader):
    torch.cuda.empty_cache()
    model.eval()

    meanloss = utils.AverageMeter()
    meanrmse = utils.AverageMeter()
    results = {"image": [], "loss": [], "rmse": []}
    with tqdm(total=len(dl.dataset), ncols=100, leave=False, file=sys.stdout, desc=f"Valid[{epoch:03d}]") as t:
        for f, x, offset, ratio, y in dl:
            x_ = x.cuda()
            y_ = y.cuda()
            p_ = model(x_)
            loss = criterion(p_, y_)
            rmse = criterion_rmse(p_, y_)

            meanloss.update(loss.item())
            meanrmse.update(rmse.item())
            results["image"].append(f)
            results["loss"].append(loss.item())
            results["rmse"].append(rmse.item())
            t.set_postfix_str(f"loss: {loss.item():.6f}, rmse: {rmse.item():.6f}", refresh=False)
            t.update(len(x))

    return meanloss(), meanrmse(), results

In [24]:
kf = KFold(n_splits=5, shuffle=True, random_state=SEED)

In [28]:
train_idx, valid_idx = list(kf.split(ds_train_total))[FOLD - 1]

ds_train = Subset(ds_train_total, train_idx)
ds_valid = Subset(ds_train_total, valid_idx)
dl_train = DataLoader(ds_train, **dl_kwargs, shuffle=True)
dl_valid = DataLoader(ds_valid, **dl_kwargs, shuffle=False)

best_loss = math.inf
early_stop_cnt = 0

for epoch in range(START_EPOCH, 999):
    tloss, trmse, tres = train_loop(dl_train)
    vloss, vrmse, vres = valid_loop(dl_valid)

    # Logging
    log.info(f'Epoch: {epoch:03d}, loss: {tloss:.6f} ; {vloss:.6f}, rmse {trmse:.6f} ; {vrmse:.6f}')
    scheduler.step(vloss)

    # Earlystop
    if vloss < best_loss:
        best_loss = vloss
        early_stop_cnt = 0

        with open(RESULT_DIR/f'loss-{UID}.json', 'w') as f:
            json.dump({'train': tres, 'valid': vres}, f)

        torch.save({
            'model': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'epoch': epoch,
        }, RESULT_DIR/f'ckpt-{UID}.pth')
    elif early_stop_cnt >= 10:
        log.info(f'Stop training at epoch {epoch}.')
        break
    else:
        early_stop_cnt +=1

[34m[2021-03-10 21:04:39  INFO] Epoch: 001, loss: 8.104329 ; 6.365505, rmse 170.510487 ; 127.524917[0m
[34m[2021-03-10 21:07:43  INFO] Epoch: 002, loss: 5.429306 ; 4.890924, rmse 105.545063 ; 85.588395[0m
[34m[2021-03-10 21:10:47  INFO] Epoch: 003, loss: 4.388287 ; 4.283707, rmse 74.655548 ; 58.925705[0m
[34m[2021-03-10 21:13:51  INFO] Epoch: 004, loss: 3.894571 ; 4.053457, rmse 57.168282 ; 51.238611[0m
[34m[2021-03-10 21:16:55  INFO] Epoch: 005, loss: 3.588316 ; 3.917953, rmse 49.147511 ; 45.867757[0m
[34m[2021-03-10 21:19:58  INFO] Epoch: 006, loss: 3.348715 ; 3.830489, rmse 43.668880 ; 43.681314[0m
[34m[2021-03-10 21:23:02  INFO] Epoch: 007, loss: 3.140721 ; 3.799157, rmse 38.995623 ; 43.613693[0m
[34m[2021-03-10 21:26:07  INFO] Epoch: 008, loss: 2.947796 ; 3.776302, rmse 36.506101 ; 43.265954[0m
[34m[2021-03-10 21:29:10  INFO] Epoch: 009, loss: 2.774278 ; 3.805148, rmse 33.028875 ; 38.951607[0m
[34m[2021-03-10 21:32:14  INFO] Epoch: 010, loss: 2.600559 ; 3.842333

KeyboardInterrupt: 

horizontal flip을 할 때 left right 구분이 있는 keypoint들만 

TODO: criterion을 L1을 써보기