In [1]:
%load_ext lab_black

In [2]:
import math
import os
import random
import shutil
import json
import logging
import sys
from collections import defaultdict
from datetime import datetime
from io import TextIOWrapper
from pathlib import Path
from typing import Callable, List, Sequence, Tuple

import albumentations as A
import cv2
import imageio
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from albumentations.pytorch import ToTensorV2
from sklearn.model_selection import KFold
from torch import nn, optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader, Dataset
from torch.utils.tensorboard import SummaryWriter
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from tqdm import tqdm
from PIL import Image

import utils
import networks

In [3]:
MEAN = torch.tensor((0.485, 0.456, 0.406), dtype=torch.float32).reshape(3, 1, 1)
STD = torch.tensor((0.229, 0.224, 0.225), dtype=torch.float32).reshape(3, 1, 1)

PAD = 25

In [4]:
class ImageDataset(Dataset):
    def __init__(self, files, keypoints=None, padding=40):
        super().__init__()
        self.files = files
        self.keypoints = keypoints
        self.padding = padding

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        f = self.files[idx]
        img = imageio.imread(f)
        x = img[38:1062, 374:1526]  # 1152x1024
        x = torch.tensor(x, dtype=torch.float32).permute(2, 0, 1).div(255.0)
        x = (x - MEAN) / STD

        return x

In [5]:
train_imgs = sorted(list(Path("data/ori/train_imgs/").glob("*.jpg")))
test_imgs = sorted(list(Path("data/ori/test_imgs/").glob("*.jpg")))

In [6]:
ds_train = ImageDataset(train_imgs)
ds_test = ImageDataset(test_imgs)

In [7]:
dl_train = DataLoader(ds_train, batch_size=20, num_workers=8, shuffle=False)
dl_test = DataLoader(ds_test, batch_size=20, num_workers=8, shuffle=False)

In [8]:
model = networks.EfficientDet("efficientdet-d7", pretrained=True)
model = model.cuda()
model.eval()
torch.set_grad_enabled(False)
pass

In [9]:
dir_train = Path("data/box_effdet/train_imgs")
dir_test = Path("data/box_effdet/test_imgs")
dir_train_keypoint = Path("data/box_effdet/train_keypoints")
dir_train.mkdir(parents=True, exist_ok=True)
dir_test.mkdir(parents=True, exist_ok=True)
dir_train_keypoint.mkdir(parents=True, exist_ok=True)

In [10]:
shutil.copy("data/ori/train_df.csv", "data/box_effdet/train_df.csv")

'data/box_effdet/train_df.csv'

In [11]:
df = pd.read_csv("data/box_effdet/train_df.csv")

In [14]:
torch.cuda.empty_cache()
i = 0
result_train = []
with tqdm(total=len(dl_train.dataset), ncols=100, file=sys.stdout) as t:
    for xs in dl_train:
        xs_ = xs.cuda()
        outs = model(xs_)
        for x, out in zip(xs, outs):
            t.set_postfix_str(train_imgs[i].name)

            # 사람 주변에 약간의 영역을 만들어줌
            box = utils.get_single_person_rois(out)
            box[0] -= PAD
            box[1] -= PAD
            box[2] += PAD
            box[3] += PAD
            out = ((x * STD + MEAN) * 255.0).permute(1, 2, 0).type(torch.uint8).numpy()

            # 키포인트 변환을 위한 메타데이터
            offset = [box[0] + 374, box[1] + 38]

            # 키포인트가 정상적인 위치에 들어있는지 검사
            keypoint = df.iloc[i, 1:].values.astype(np.float32)
            keypoint = np.stack([keypoint[0::2], keypoint[1::2]], 1)
            keypoint[:, 0] -= offset[0]
            keypoint[:, 1] -= offset[1]
            W = box[2] - box[0]
            H = box[3] - box[1]

            if (keypoint[:, 0] < 0).any():
                box[0] += keypoint[:, 0].min() - 10
                offset[0] += keypoint[:, 0].min().item() - 10
                keypoint[:, 0] += -keypoint[:, 0].min() + 10
            if (keypoint[:, 1] < 0).any():
                box[1] += keypoint[:, 1].min() - 10
                offset[1] += keypoint[:, 1].min() - 10
                keypoint[:, 1] += -keypoint[:, 1].min().item() + 10
            if (keypoint[:, 0] >= box[2] - box[0]).any():
                box[2] += keypoint[:, 0].max() - W + 10
            if (keypoint[:, 1] >= box[3] - box[1]).any():
                box[3] += keypoint[:, 1].max() - H + 10

            offset = np.array(offset).tolist()
            result_train.append({"image": train_imgs[i].name, "boxes": offset})

            out = out[box[1] : box[3], box[0] : box[2]]
            imageio.imwrite(dir_train / train_imgs[i].name, out)

            # 키포인트를 입힌 이미지도 모두 출력.
            # 키포인트가 제대로 잡혀있는지 확인
            keypoint_img = utils.draw_keypoints(out, keypoint)
            imageio.imwrite(dir_train_keypoint / train_imgs[i].name, keypoint_img)

            i += 1
            t.update()

100%|████████████████████████████| 4195/4195 [08:10<00:00,  8.56it/s, 642-2-4-31-Z148_E-0000031.jpg]


In [15]:
torch.cuda.empty_cache()
i = 0
result_test = []
with tqdm(total=len(dl_test.dataset), ncols=100, file=sys.stdout) as t:
    for xs in dl_test:
        xs_ = xs.cuda()
        outs_ = model(xs_)
        for x, p in zip(xs, outs_):
            t.set_postfix_str(test_imgs[i].name)

            # 사람 주변에 약간의 영역을 만들어줌
            box = utils.get_single_person_rois(out)
            box[0] -= PAD
            box[1] -= PAD
            box[2] += PAD
            box[3] += PAD
            out = ((x * STD + MEAN) * 255.0).permute(1, 2, 0).type(torch.uint8).numpy()
            out = out[box[1] : box[3], box[0] : box[2]]
            imageio.imwrite(dir_test / test_imgs[i].name, out)

            # 키포인트 변환을 위한 메타데이터
            offset = [box[0] + 374, box[1] + 38]
            offset = np.array(offset).tolist()
            result_test.append({"image": test_imgs[i].name, "boxes": offset})

            i += 1
            t.update()

100%|█████████████████████████████| 1600/1600 [02:57<00:00,  8.99it/s, 786-3-5-41-Z94_E-0000031.jpg]


In [24]:
for item in result_train:
    item["boxes"] = np.array(item["boxes"]).tolist()

In [25]:
with open("data/box_effdet/offset.json", "w") as f:
    json.dump({"train": result_train, "test": result_test}, f)