In [3]:
import argparse
import datetime
import os
import traceback

import numpy as np
import torch
import yaml
from tensorboardX import SummaryWriter
from torch import nn
from torch.utils.data import DataLoader

from torchvision import transforms
from tqdm.autonotebook import tqdm

from backbone import EfficientDetBackbone
from efficientdet.dataset import CocoDataset, Resizer, Normalizer, Augmenter, collater
from efficientdet.loss import FocalLoss
from utils.sync_batchnorm import patch_replication_callback
from utils.utils import replace_w_sync_bn, CustomDataParallel, get_last_weights, init_weights, boolean_string

import glob
from cv2 import cv2
import matplotlib.pyplot as plt

from efficientdet.utils import Anchors

import itertools

In [4]:
class Params:
    def __init__(self, project_file):
        self.params = yaml.safe_load(open(project_file).read())

    def __getattr__(self, item):
        return self.params.get(item, None)

In [5]:
class ModelWithLoss(nn.Module):
    def __init__(self, model, debug=False):
        super().__init__()
        self.criterion = FocalLoss()
        self.model = model
        self.debug = debug

    def forward(self, imgs, annotations, obj_list=None):
        _, regression, classification, anchors = self.model(imgs)
        if self.debug:
            cls_loss, reg_loss = self.criterion(classification, regression, anchors, annotations,
                                                imgs=imgs, obj_list=obj_list)
        else:
            cls_loss, reg_loss = self.criterion(classification, regression, anchors, annotations)
        return cls_loss, reg_loss
    
    
    
def calc_iou(a, b):
    # a(anchor) [boxes, (y1, x1, y2, x2)]
    # b(gt, coco-style) [boxes, (x1, y1, x2, y2)]

    area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])
    iw = torch.min(torch.unsqueeze(a[:, 3], dim=1), b[:, 2]) - torch.max(torch.unsqueeze(a[:, 1], 1), b[:, 0])
    ih = torch.min(torch.unsqueeze(a[:, 2], dim=1), b[:, 3]) - torch.max(torch.unsqueeze(a[:, 0], 1), b[:, 1])
    iw = torch.clamp(iw, min=0)
    ih = torch.clamp(ih, min=0)
    ua = torch.unsqueeze((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), dim=1) + area - iw * ih
    ua = torch.clamp(ua, min=1e-8)
    intersection = iw * ih
    IoU = intersection / ua

    return IoU

# 1. logo dataset 다운로드 -> datasets 폴더에 저장

In [6]:
# download and unzip dataset
! mkdir datasets
! wget https://github.com/zylo117/Yet-Another-EfficientDet-Pytorch/releases/download/1.1/dataset_logo.zip
! unzip -d datasets/ dataset_logo.zip

mkdir: cannot create directory ‘datasets’: File exists
--2021-10-04 15:56:26--  https://github.com/zylo117/Yet-Another-EfficientDet-Pytorch/releases/download/1.1/dataset_logo.zip
Resolving github.com (github.com)... 15.164.81.167
Connecting to github.com (github.com)|15.164.81.167|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://github-releases.githubusercontent.com/253385242/920dbf00-4122-11eb-8c0a-13d45e9b486b?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20211004%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20211004T065626Z&X-Amz-Expires=300&X-Amz-Signature=7892774d0c7ac797a1e8c17e2d86ff2cfba4ac24b899d28367bef5ad6fa75516&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=253385242&response-content-disposition=attachment%3B%20filename%3Ddataset_logo.zip&response-content-type=application%2Foctet-stream [following]
--2021-10-04 15:56:26--  https://github-releases.githubusercontent.com/253385242/920dbf00-4122-11eb-8c0a-13d

# 2. Custom dataloader 정의 및 model load

In [22]:
#params = Params('projects/birdview_vehicles.yml')
params = Params('projects/logo.yml')
#params = Params('projects/coco.yml')

if params.num_gpus == 0:
    os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
else:
    torch.manual_seed(42)

#opt.saved_path = opt.saved_path + f'/{params.project_name}/'
#opt.log_path = opt.log_path + f'/{params.project_name}/tensorboard/'
#os.makedirs(opt.log_path, exist_ok=True)
#os.makedirs(opt.saved_path, exist_ok=True)

training_params = {'batch_size': 16,
                   'shuffle': True,
                   'drop_last': True,
                   'collate_fn': collater,
                   'num_workers': 1}

val_params = {'batch_size': 1,
              'shuffle': False,
              'drop_last': True,
              'collate_fn': collater,
              'num_workers': 1}

In [23]:
compound_coef = 0

input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]
training_set = CocoDataset(root_dir=os.path.join('datasets/', params.project_name), set=params.train_set,
                           transform=transforms.Compose([Normalizer(mean=params.mean, std=params.std),
                                                         Augmenter(),
                                                         Resizer(input_sizes[compound_coef])]))
training_generator = DataLoader(training_set, **training_params)

val_set = CocoDataset(root_dir=os.path.join('datasets/', params.project_name), set=params.val_set,
                      transform=transforms.Compose([Normalizer(mean=params.mean, std=params.std),
                                                    Resizer(input_sizes[compound_coef])]))
val_generator = DataLoader(val_set, **val_params)

model = EfficientDetBackbone(num_classes=len(params.obj_list), compound_coef=compound_coef,
                             ratios=eval(params.anchors_ratios), scales=eval(params.anchors_scales))

#model.load_state_dict(torch.load('../../weights/efficientdet-d0.pth', map_location='cpu'))

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


# Anchor, Focal Loss 코드 파헤치기
(https://heeyoung-alldata.tistory.com/24 블로그 정리 참고)


In [24]:
anchor_scale = [4., 4., 4., 4., 4., 4., 4., 5., 4.]
pyramid_levels = [5, 5, 5, 5, 5, 5, 5, 5, 6]
kwargs = {'ratios' : eval(params.anchors_ratios), 'scales' : eval(params.anchors_scales)}

anchors = Anchors(anchor_scale=anchor_scale[0],
                   pyramid_levels=(torch.arange(pyramid_levels[compound_coef]) + 3).tolist(),
                   **kwargs)

temp = training_set[330]
img = temp['img'].permute(2, 0, 1).unsqueeze(0)

anchor = anchors(img)

In [25]:
pyramid_level = [3, 4, 5, 6, 7]
strides = [2**x for x in pyramid_level]

anchors_scales = [2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]
anchors_ratios = [(1.0, 1.0), (1.3, 0.8), (1.9, 0.5)]

In [26]:
boxes_all = []
for stride in strides:
    boxes_level = []
    for scale, ratio in itertools.product(anchors_scales, anchors_ratios):
        
        anchor_scale = 4
        base_anchor_size = anchor_scale * stride * scale
        anchor_size_x_2 = base_anchor_size * ratio[0] / 2.0
        anchor_size_y_2 = base_anchor_size * ratio[1] / 2.0

        image_size = 512
        
        x = np.arange(stride / 2, image_size, stride)
        y = np.arange(stride / 2, image_size, stride)
        xv, yv = np.meshgrid(x, y)
        xv = xv.reshape(-1)
        yv = yv.reshape(-1)

        # y1,x1,y2,x2
        boxes = np.vstack((yv - anchor_size_y_2, xv - anchor_size_x_2,
                           yv + anchor_size_y_2, xv + anchor_size_x_2))
        boxes = np.swapaxes(boxes, 0, 1)
        boxes_level.append(np.expand_dims(boxes, axis=1))
    # concat anchors on the same level to the reshape NxAx4
    boxes_level = np.concatenate(boxes_level, axis=1)
    boxes_all.append(boxes_level.reshape([-1, 4]))

anchor_boxes = np.vstack(boxes_all)

anchor_boxes = torch.from_numpy(anchor_boxes.astype(np.float32))
anchor_boxes = anchor_boxes.unsqueeze(0)

In [27]:
compound_coef = 0

input_sizes = [512, 640, 768, 896, 1024, 1280, 1280, 1536, 1536]
training_set = CocoDataset(root_dir=os.path.join('datasets/', params.project_name), set=params.train_set,
                           transform=transforms.Compose([Normalizer(mean=params.mean, std=params.std),
                                                         Augmenter(),
                                                         Resizer(input_sizes[compound_coef])]))
training_generator = DataLoader(training_set, **training_params)

val_set = CocoDataset(root_dir=os.path.join('datasets/', params.project_name), set=params.val_set,
                      transform=transforms.Compose([Normalizer(mean=params.mean, std=params.std),
                                                    Resizer(input_sizes[compound_coef])]))
val_generator = DataLoader(val_set, **val_params)

model = EfficientDetBackbone(num_classes=len(params.obj_list), compound_coef=compound_coef,
                             ratios=eval(params.anchors_ratios), scales=eval(params.anchors_scales))

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [28]:
temp = training_set[330]

img = temp['img'].permute(2, 0, 1).unsqueeze(0)
annotations = temp['annot'].unsqueeze(0)
annotations.size()

torch.Size([1, 3, 5])

In [29]:
features, regressions, classifications, anchors = model(img)

In [30]:
regressions.size(), classifications.size(), anchors.size()

(torch.Size([1, 49104, 4]),
 torch.Size([1, 49104, 10]),
 torch.Size([1, 49104, 4]))

In [31]:
alpha = 0.25
gamma = 2.0
batch_size = classifications.shape[0]
classification_losses = []
regression_losses = []

anchor = anchors[0, :, :]  # assuming all image sizes are the same, which it is
dtype = anchors.dtype

anchor_widths = anchor[:, 3] - anchor[:, 1]        # w
anchor_heights = anchor[:, 2] - anchor[:, 0]       # h
anchor_ctr_x = anchor[:, 1] + 0.5 * anchor_widths  # center x
anchor_ctr_y = anchor[:, 0] + 0.5 * anchor_heights # center y

j = 0

classification = classifications[j, :, :]                         # torch.Size([49104, 2])
regression = regressions[j, :, :]                                 # torch.Size([49104, 4])

bbox_annotation = annotations[j]                                  # torch.Size([27, 5])
bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1]    # torch.Size([27, 5])

classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)


IoU = calc_iou(anchor[:, :], bbox_annotation[:, :4])  # (49104, n)

IoU_max, IoU_argmax = torch.max(IoU, dim=1)          # IoU_argmax -> 몇 번째 GT bbox와 IoU가 가장 큰지에 대한 index값

# compute the loss for classification
targets = torch.ones_like(classification) * -1

targets[torch.lt(IoU_max, 0.4), :] = 0               # Labeling -> 49104개 중, GT와의 IoU가 0.4보다 작으면 0 부여

positive_indices = torch.ge(IoU_max, 0.5)            # Labeling -> 49104개 중, GT와의 IoU가 0.5보다 크면 positive

num_positive_anchors = positive_indices.sum()        # 0.5보다 큰게 369개 있음.

In [32]:
assigned_annotations = bbox_annotation[IoU_argmax, :] # 49104개의 anchor에 대응되는 GT 만들어줌(즉, 27 -> 49104개)

In [33]:
targets[positive_indices, :] = 0
targets[positive_indices, assigned_annotations[positive_indices, 4].long()] = 1

In [34]:
alpha_factor = torch.ones_like(targets) * alpha


alpha_factor = torch.where(torch.eq(targets, 1.), alpha_factor, 1. - alpha_factor)
focal_weight = torch.where(torch.eq(targets, 1.), 1. - classification, classification)
focal_weight = alpha_factor * torch.pow(focal_weight, gamma)

bce = -(targets * torch.log(classification) + (1.0 - targets) * torch.log(1.0 - classification))

cls_loss = focal_weight * bce

zeros = torch.zeros_like(cls_loss)

cls_loss = torch.where(torch.ne(targets, -1.0), cls_loss, zeros)

classification_losses.append(cls_loss.sum() / torch.clamp(num_positive_anchors.to(dtype), min=1.0))

In [35]:
if positive_indices.sum() > 0:
    assigned_annotations = assigned_annotations[positive_indices, :]

    anchor_widths_pi = anchor_widths[positive_indices]
    anchor_heights_pi = anchor_heights[positive_indices]
    anchor_ctr_x_pi = anchor_ctr_x[positive_indices]
    anchor_ctr_y_pi = anchor_ctr_y[positive_indices]

    gt_widths = assigned_annotations[:, 2] - assigned_annotations[:, 0]
    gt_heights = assigned_annotations[:, 3] - assigned_annotations[:, 1]
    gt_ctr_x = assigned_annotations[:, 0] + 0.5 * gt_widths
    gt_ctr_y = assigned_annotations[:, 1] + 0.5 * gt_heights

    # efficientdet style
    gt_widths = torch.clamp(gt_widths, min=1)
    gt_heights = torch.clamp(gt_heights, min=1)

    targets_dx = (gt_ctr_x - anchor_ctr_x_pi) / anchor_widths_pi
    targets_dy = (gt_ctr_y - anchor_ctr_y_pi) / anchor_heights_pi
    targets_dw = torch.log(gt_widths / anchor_widths_pi)
    targets_dh = torch.log(gt_heights / anchor_heights_pi)

    targets = torch.stack((targets_dy, targets_dx, targets_dh, targets_dw))
    targets = targets.t()

    regression_diff = torch.abs(targets - regression[positive_indices, :])

    regression_loss = torch.where(torch.le(regression_diff, 1.0 / 9.0), 
                                  0.5 * 9.0 * torch.pow(regression_diff, 2), 
                                  regression_diff - 0.5 / 9.0)
    regression_losses.append(regression_loss.mean())
    
print(targets)
print(regression[positive_indices, :])

tensor([[-0.0156,  0.1727,  0.1978, -0.4967],
        [-0.0078,  0.0781, -0.4953,  0.1452],
        [-0.0098,  0.0601, -0.2722, -0.1172],
        [-0.0156,  0.0411,  0.1978, -0.4967],
        [-0.0098, -0.1322, -0.2722, -0.1172],
        [-0.0156, -0.0905,  0.1978, -0.4967],
        [-0.0156, -0.2220,  0.1978, -0.4967]], dtype=torch.float64)
tensor([[-0.2777,  0.0536,  0.0441,  1.0189],
        [-0.4079,  0.5094, -0.3100,  1.6994],
        [ 0.1461,  1.3958,  0.6359, -1.0447],
        [-0.1743,  0.7384,  0.6217,  0.7275],
        [-0.2631,  0.3858,  0.4581, -0.2207],
        [-0.0449, -0.2552,  0.3049,  0.4693],
        [-0.4731,  0.1095,  0.4441,  0.1009]], grad_fn=<IndexBackward>)


# Training

In [37]:
model = ModelWithLoss(model, debug=False)

model.cpu()

optimizer = torch.optim.SGD(model.parameters(), 0.0001, momentum=0.9, nesterov=True)

scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)




for epoch in range(10):

    epoch_loss = []
    progress_bar = tqdm(training_generator)
    for iter, data in enumerate(progress_bar):

        imgs = data['img']
        annot = data['annot']

        if params.num_gpus == 1:
            # if only one gpu, just send it to cpu:0
            # elif multiple gpus, send it to multiple gpus in CustomDataParallel, not here
            imgs = imgs.cpu()
            annot = annot.cpu()

        optimizer.zero_grad()
        cls_loss, reg_loss = model(imgs, annot, obj_list=params.obj_list)
        cls_loss = cls_loss.mean()
        reg_loss = reg_loss.mean()

        loss = cls_loss + reg_loss
        print(loss)
        
        if loss == 0 or not torch.isfinite(loss):
            continue

        loss.backward()
        # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
        optimizer.step()

        epoch_loss.append(float(loss))

  0%|          | 0/37 [00:00<?, ?it/s]

KeyboardInterrupt: 