# 1. Prerequisites

## 1-1. Python Libraries

- Albumentations
- opencv-python
- imageio
- numpy
- pandas
- timm
- torch==1.7.0 with cuda toolkit 11.2.2, cudnn8
- pyaml
- adabelief_pytorch
- scikit-learn
- tqdm

## 1-2. Download Pre-trained Weights

```bash
wget -i "pretrained_weights.txt" -P results
```

In [2]:
import gc
import json
import math
import random
import re
import shutil
import sys
from collections import defaultdict
from dataclasses import dataclass
from multiprocessing import Pool
from os import PathLike
from pathlib import Path
from typing import Any, Tuple, List

import albumentations as A
import cv2
import imageio
import numpy as np
import pandas as pd
import timm
import torch
import torch.nn as nn
import torch.nn.functional as F
import yaml
from adabelief_pytorch import AdaBelief
from albumentations.pytorch import ToTensorV2
from sklearn.metrics import classification_report, f1_score
from sklearn.model_selection import StratifiedKFold
from timm.models.layers import Conv2dSame
from timm.models.nfnet import ScaledStdConv2dSame
from torch import Tensor
from torch.optim import AdamW
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm

In [3]:
from const.flip import id_flip
from const.label_names import id_to_label, label_names
from utils import AverageMeter, CustomLogger, make_result_dir, seed_everything, tqdm_kwargs

# 1. Dataset

The original dataset is located in `./data/ori`.
And I will make new dataset by cropping the original dataset and it will be located in `./data/crop512_9`.

The new dataset contains both `*.png` and `*.pth` files.
But the `*.pth` files are not used in this code, so you can just ignore it.

In [16]:
dsize = (512, 512)
crop_padding = 120
ratio_limit = 1.2
seq_len = 5

# data numbers where its keypoints contains error
wrong_data = [312, 317, 318, 327, 340, 343, 475, 543, 619, 622, 750, 746]

In [17]:
imagenet_mean = np.array([0.485, 0.456, 0.406]).reshape(1, 1, 3)
imagenet_std = np.array([0.229, 0.224, 0.225]).reshape(1, 1, 3)

In [18]:
out_dir = Path("./data/crop512_9")
if out_dir.exists():
    shutil.rmtree(out_dir)

train_out_dir = out_dir / "train"
test_out_dir = out_dir / "test"
train_out_dir.mkdir(parents=True, exist_ok=True)
test_out_dir.mkdir(parents=True, exist_ok=True)

In [19]:
def elastic_resize(im, bbox, dsize, ratio_limit):
    """resize while keep aspect ratio"""
    # bbox (x1, y1, x2, y2)
    # dsize (w, h)
    # ratio_limit: float

    w = bbox[2] - bbox[0]
    h = bbox[3] - bbox[1]

    if h == w:
        return cv2.resize(im[bbox[1] : bbox[3], bbox[0] : bbox[2]], dsize)

    long = h > w
    a, b = (h, w) if long else (w, h)
    ratio = a / b

    if ratio <= ratio_limit:
        return cv2.resize(im[bbox[1] : bbox[3], bbox[0] : bbox[2]], dsize)

    e, f, g = (bbox[0], bbox[2], im.shape[1]) if long else (bbox[1], bbox[3], im.shape[0])

    db = int(a / ratio_limit)
    c = db - b
    e -= math.ceil(c / 2)
    f += math.floor(c / 2)

    if e < 0:
        f += -e
        e = 0
    elif f > g:
        e -= f - g
        f = g

    e = max(0, e)
    f = min(f, g)

    if long:
        bbox[0], bbox[2] = e, f
    else:
        bbox[1], bbox[3] = e, f
    fb = f - e

    return cv2.resize(im[bbox[1] : bbox[3], bbox[0] : bbox[2]], dsize)

In [20]:
def find_bbox(im, u):
    """
    refer to 게으름뱅이 code share:
        https://dacon.io/competitions/official/235805/codeshare/3373?page=1&dtype=recent
    """
    mask = (im == [255, 0, 0]).all(axis=-1) | (im == [0, 255, 0]).all(axis=-1)

    pos = np.stack(mask.nonzero())
    bbox = np.round(
        np.array(
            (
                np.clip(pos[1, :].min() - u, 0, 1920),
                np.clip(pos[0, :].min() - u, 0, 1920),
                np.clip(pos[1, :].max() + u, 0, 1920),
                np.clip(pos[0, :].max() + u, 0, 1920),
            ),
            dtype=np.float64,
        )
    ).astype(np.int64)

    return bbox

In [21]:
def process_image(impath: Path, keypoints: np.ndarray):
    im = imageio.imread(impath)

    # crop
    u = crop_padding

    if int(impath.parent.name) in wrong_data:
        bbox = find_bbox(im, u)
    else:
        v = keypoints
        bbox = np.round(
            np.array(
                (
                    np.clip(v[:, 0].min() - u, 0, 1920),
                    np.clip(v[:, 1].min() - u, 0, 1080),
                    np.clip(v[:, 0].max() + u, 0, 1920),
                    np.clip(v[:, 1].max() + u, 0, 1080),
                ),
                dtype=np.float32,
            )
        ).astype(np.int64)

    im = elastic_resize(im, bbox, dsize, ratio_limit)

    # standardization
    im2 = (im.astype(np.float32) / 255.0 - imagenet_mean) / imagenet_std
    im2 = torch.from_numpy(im2).permute(2, 0, 1).type(torch.float32)

    return im, im2

## 1-1. Generate Training Dataset

The training image file names are following this format `{dir index}_{image index}_{label index}.png`, e.g. `001_02_003.png`.

In [22]:
def process_dir_train(dirpath: Path):
    with open(dirpath / f"{dirpath.name}.json") as f:
        j = json.load(f)

    diridx = int(dirpath.name)

    label = id_to_label[j["action"][0]]
    label = torch.tensor(label, dtype=torch.long)

    for i, annot in enumerate(j["annotations"]):
        impath = dirpath / f"{i}.png"
        im_org, im = process_image(impath, np.array(annot["data"]))

        # save image
        fname = f"{diridx:03d}_{i:02d}_{label.item():03d}"
        imageio.imwrite(train_out_dir / (fname + ".png"), im_org)
        torch.save(im, train_out_dir / (fname + ".pth"))

In [23]:
dirs = sorted(list(Path("./data/ori/train").glob("*")))

In [24]:
len(dirs)  # 649

649

There are 649 training directories containing multiple images.
I applied parallelism because processing all of these data takes too long.

In [25]:
with Pool() as pool:
    with tqdm(total=len(dirs), ncols=100, file=sys.stdout) as t:
        for _ in pool.imap_unordered(process_dir_train, dirs):
            t.update()

100%|█████████████████████████████████████████████████████████████| 649/649 [12:01<00:00,  1.11s/it]


## 1-2. Generate Test Dataset

The test image file names are following this format `{dir index}_{image index}_{label index}.png`, e.g. `001_02.png`.
It's basically similar to training dataset but there is no label.

In [26]:
def process_dir_test(dirpath: Path):
    with open(dirpath / f"{dirpath.name}.json") as f:
        j = json.load(f)

    diridx = int(dirpath.name)

    for i, annot in enumerate(j["annotations"]):
        impath = dirpath / f"{i}.png"
        im_org, im = process_image(impath, np.array(annot["data"]))

        # save image
        fname = f"{diridx:03d}_{i:02d}"
        imageio.imwrite(test_out_dir / (fname + ".png"), im_org)
        torch.save(im, test_out_dir / (fname + ".pth"))

In [27]:
dirs = sorted(list(Path("./data/ori/test").glob("*")))

In [28]:
len(dirs)  # 217

217

In [29]:
with Pool() as pool:
    with tqdm(total=len(dirs), ncols=100, file=sys.stdout) as t:
        for _ in pool.imap_unordered(process_dir_test, dirs):
            t.update()

100%|█████████████████████████████████████████████████████████████| 217/217 [04:15<00:00,  1.18s/it]


# 2. Inference

In [9]:
from main import Config, Net, GestureDataset, DATA_DIR, DATA_NAME, N_CLASSES, FileLoader

In [12]:
def make_test_dataset(config: Config):
    files_test = sorted(list((DATA_DIR / DATA_NAME / "test").glob("*.png")))
    fileloader = FileLoader(in_memory=config.in_memory, files=files_test)

    data = defaultdict(list)
    for file in files_test:
        # test filename: {diridx:3d}_{fileidx:2d}
        diridx = int(file.stem[:3])
        data[diridx].append(file)

    items_test = []
    for diridx in data:
        if len(data[diridx]) > config.len_sequence:
            for i in range(len(data[diridx]) - config.len_sequence):
                items_test.append((data[diridx][i : i + config.len_sequence], diridx))
        elif len(data[diridx]) == config.len_sequence:
            items_test.append((data[diridx], diridx))
        else:
            fake = [data[diridx][-1] for _ in range(config.len_sequence - len(data[diridx]))]
            items_test.append((data[diridx] + fake, diridx))

    dl_kwargs = dict(batch_size=config.batch_size, num_workers=config.num_workers, pin_memory=True)
    ds_test = GestureDataset(items_test, fileloader=fileloader, augmentation=False)
    dl_test = DataLoader(ds_test, **dl_kwargs, shuffle=False)

    return dl_test

In [13]:
config = Config(
    debug=False,
    finetune=True,
    model_name="tf_efficientnetv2_l_in21ft1k",
    batch_size=18,
    sam=True,
    pretrained=True,
    optimizer_name="AdaBelief",
    fold=1,
    seed=1,
    num_workers=6,
    in_memory=True,
    lr=1e-3,
)
config

Config(exp_num='001', ver_num=None, result_dir_root=PosixPath('results/exp'), result_dir=None, seed=1, debug=False, model_name='tf_efficientnetv2_l_in21ft1k', checkpoint_path=None, len_sequence=5, pretrained=True, criterion='focal', num_folds=5, fold=1, epochs=100, finetune=True, finetune_step1_epochs=2, finetune_step2_epochs=4, optimizer_name='AdaBelief', lr=0.001, weight_decay=0.01, scheduler=<class 'torch.optim.lr_scheduler.ReduceLROnPlateau'>, sam=True, look_ahead=True, look_ahead_k=5, look_ahead_alpha=0.5, batch_size=18, num_workers=6, cleared_image=False, in_memory=True)

In [14]:
dl = make_test_dataset(config)

in-memory loading...: 100%|████████████████████████████████████| 2038/2038 [00:18<00:00, 108.37it/s]


In [16]:
model = Net(
    name=config.model_name,
    n_classes=N_CLASSES,
    pretrained=False,
    len_sequence=config.len_sequence,
).cuda()

In [20]:
model.eval()
torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x7f41bc1aef50>

In [23]:
def submit(ckpt_path, ver_idx):
    # print('Load checkpoint:', ckpt_path)
    model.load_state_dict(torch.load(ckpt_path))
    model.eval()

    ret = defaultdict(list)
    with tqdm(total=len(dl.dataset), ncols=100, file=sys.stdout, desc=f"submission{ver_idx:02d}") as t:
        for images, diridxes in dl:
            logits = model(images.cuda()).cpu()

            for logit, diridx in zip(logits, diridxes):
                ret[diridx.item()].append(logit)

                t.update()

    out_ms = defaultdict(list)
    for diridx, logits in ret.items():
        out_ms["Image_Path"].append(f"./test\\{diridx}")

        logits = torch.stack(logits)
        logit_ms = logits.mean(dim=0).softmax(dim=0)

        for k in range(196):
            if k in id_to_label:
                out_ms[f"Label_{k}"].append(logit_ms[id_to_label[k]].item())

    df_ms = pd.DataFrame(out_ms)

    out_df_path = Path(f"./results/submission/{ver_idx:02d}.csv")
    out_df_path.parent.mkdir(parents=True, exist_ok=True)
    # print("Write result to", out_df_path % "_")
    df_ms.to_csv(out_df_path, index=False)

In [29]:
%%time
for i, ckpt_path in enumerate(sorted(list(Path('results').glob('exp*.pth')))):
    submit(ckpt_path,i)

submission00: 100%|███████████████████████████████████████████████| 959/959 [00:37<00:00, 25.42it/s]
submission01: 100%|███████████████████████████████████████████████| 959/959 [00:38<00:00, 25.15it/s]
submission02: 100%|███████████████████████████████████████████████| 959/959 [00:37<00:00, 25.78it/s]
submission03: 100%|███████████████████████████████████████████████| 959/959 [00:37<00:00, 25.35it/s]
submission04: 100%|███████████████████████████████████████████████| 959/959 [00:37<00:00, 25.39it/s]
submission05: 100%|███████████████████████████████████████████████| 959/959 [00:38<00:00, 25.17it/s]
submission06: 100%|███████████████████████████████████████████████| 959/959 [00:38<00:00, 25.08it/s]
submission07: 100%|███████████████████████████████████████████████| 959/959 [00:37<00:00, 25.62it/s]
submission08: 100%|███████████████████████████████████████████████| 959/959 [00:38<00:00, 25.14it/s]
submission09: 100%|███████████████████████████████████████████████| 959/959 [00:37<00:00, 2

# 3. Ensemble Results

In [33]:
dfs = [pd.read_csv(file) for file in list(Path("results/submission").glob("*.csv"))]
rets = [df.to_numpy()[:, 1:].astype(np.float64) for df in dfs]

In [34]:
rets = np.stack(rets)
rets.shape

(17, 217, 157)

In [35]:
# simple mean ensemble
ret = rets.mean(axis=0)

In [37]:
new_df = {"Image_Path": dfs[0]["Image_Path"]}
for i, col in enumerate(dfs[0].columns[1:]):
    new_df[col] = ret[:, i]

In [38]:
new_df = pd.DataFrame(new_df)

In [39]:
new_df

Unnamed: 0,Image_Path,Label_0,Label_1,Label_2,Label_3,Label_4,Label_5,Label_6,Label_7,Label_8,...,Label_177,Label_186,Label_188,Label_189,Label_190,Label_191,Label_192,Label_193,Label_194,Label_195
0,./test\649,0.747927,0.001944,0.000419,0.000253,0.000087,0.002656,0.082456,0.000060,0.000034,...,0.000017,1.136778e-05,0.000019,6.955291e-06,1.396406e-05,0.000038,0.000045,7.852546e-06,0.000036,0.000012
1,./test\650,0.000736,0.000008,0.000008,0.000012,0.000003,0.000005,0.000092,0.000001,0.000003,...,0.000002,7.963366e-07,0.000002,9.344761e-07,8.749638e-07,0.000003,0.000002,2.305973e-06,0.000002,0.000002
2,./test\651,0.000007,0.000708,0.000026,0.000016,0.000007,0.000005,0.000006,0.000087,0.000005,...,0.000003,2.349364e-06,0.000002,1.148435e-06,1.000146e-06,0.000002,0.000001,1.308252e-06,0.000004,0.000003
3,./test\652,0.000006,0.000677,0.000021,0.000014,0.000003,0.000002,0.000002,0.000051,0.000003,...,0.000002,1.271714e-06,0.000001,7.670794e-07,6.080505e-07,0.000002,0.000001,9.790643e-07,0.000002,0.000002
4,./test\653,0.000009,0.000020,0.000751,0.000092,0.000011,0.000003,0.000004,0.000003,0.000076,...,0.000001,9.861358e-07,0.000001,1.401509e-06,1.128680e-06,0.000002,0.000001,1.290545e-06,0.000002,0.000001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
212,./test\861,0.000010,0.000019,0.000006,0.000015,0.000010,0.000034,0.000013,0.000009,0.000008,...,0.000095,1.938008e-04,0.001260,1.460377e-04,1.855239e-04,0.003821,0.985866,5.399363e-04,0.000201,0.000592
213,./test\862,0.000015,0.000024,0.000024,0.000041,0.000015,0.000025,0.000022,0.000022,0.000035,...,0.002363,1.985968e-03,0.000239,6.577956e-04,1.348196e-03,0.000101,0.000368,9.789394e-01,0.000721,0.000451
214,./test\863,0.000004,0.000004,0.000007,0.000010,0.000013,0.000009,0.000009,0.000005,0.000009,...,0.000665,6.330425e-05,0.000012,9.724208e-05,8.434278e-05,0.000011,0.000176,3.722209e-04,0.992364,0.000033
215,./test\864,0.000007,0.000017,0.000017,0.000030,0.000039,0.000028,0.000021,0.000022,0.000031,...,0.005940,1.778930e-04,0.000061,1.845065e-04,2.580232e-04,0.000036,0.000862,3.127511e-03,0.978138,0.000100


In [41]:
new_df.to_csv("results/ensemble.csv", index=False)