In [1]:
import easydict
import os
import time

import torch
import torch.utils.data
import torch.utils.tensorboard
import torch.nn as nn
#import tqdm
from tqdm import tqdm

  
import os
import glob
import random

import torch
import torch.nn.functional as F
import torch.utils.data
import torchvision.transforms
import numpy as np
from PIL import Image

# ipynb파일을 import할 수 있게 해주는 모듈
# pip install import_ipynb
# 에러가 뜨면 파일 저장 후 커널 restart
import import_ipynb
import YOLOv3

importing Jupyter notebook from YOLOv3.ipynb


In [2]:
def parse_data_config(path: str):
    print(path)
    """데이터셋 설정 파일 분석"""
    options = {}
    with open(path, 'r') as f:
        lines = f.readlines()
    for line in lines:
        line = line.strip()
        key, value = line.split('=')
        options[key.strip()] = value.strip()
    return options

def load_classes(path: str):
    print(path)
    """클래스 이름 로드"""
    with open(path, "r") as f:
        names = f.readlines()
    for i, name in enumerate(names):
        names[i] = name.strip()
    return names

def init_weights_normal(m):
    """정규분포 형태로 가중치 초기화"""
    classname = m.__class__.__name__
    # https://discuss.pytorch.org/t/object-has-no-attribute-weight/31526
    # if classname.find("Conv") != -1:
    if type(m) == nn.Conv2d:
        torch.nn.init.kaiming_normal_(m.weight.data, 0.1)

    elif classname.find("BatchNorm2d") != -1:
        torch.nn.init.normal_(m.weight.data, 1.0, 0.02)
        torch.nn.init.constant_(m.bias.data, 0.0)


In [3]:
def evaluate(model, path, iou_thres, conf_thres, nms_thres, image_size, batch_size, num_workers, device):
    # 모델을 evaluation mode로 설정
    model.eval()

    # 데이터셋, 데이터로더 설정
    dataset = utils.datasets.ListDataset(path, image_size, augment=False, multiscale=False)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             shuffle=False,
                                             num_workers=num_workers,
                                             collate_fn=dataset.collate_fn)

    labels = []
    sample_metrics = []  # List[Tuple] -> [(TP, confs, pred)]
    entire_time = 0
    for _, images, targets in tqdm.tqdm(dataloader, desc='Evaluate method', leave=False):
        if targets is None:
            continue

        # Extract labels
        labels.extend(targets[:, 1].tolist())

        # Rescale targets
        targets[:, 2:] = utils.utils.xywh2xyxy(targets[:, 2:])
        targets[:, 2:] *= image_size

        # Predict objects
        start_time = time.time()
        with torch.no_grad():
            images = images.to(device)
            outputs = model(images)
            outputs = utils.utils.non_max_suppression(outputs, conf_thres, nms_thres)
        entire_time += time.time() - start_time

        # Compute true positives, predicted scores and predicted labels per batch
        sample_metrics.extend(utils.utils.get_batch_statistics(outputs, targets, iou_thres))

    # Concatenate sample statistics
    if len(sample_metrics) == 0:
        true_positives, pred_scores, pred_labels = np.array([]), np.array([]), np.array([])
    else:
        true_positives, pred_scores, pred_labels = [np.concatenate(x, 0) for x in list(zip(*sample_metrics))]

    # Compute AP
    precision, recall, AP, f1, ap_class = utils.utils.ap_per_class(true_positives, pred_scores, pred_labels, labels)

    # Compute inference time and fps
    inference_time = entire_time / dataset.__len__()
    fps = 1 / inference_time

    # Export inference time to miliseconds
    inference_time *= 1000

    return precision, recall, AP, f1, ap_class, inference_time, fps



In [4]:
def horisontal_flip(images, targets):
    images = torch.flip(images, [-1])
    targets[:, 2] = 1 - targets[:, 2]
    return images, targets

def pad_to_square(image, pad_value=0):
    _, h, w = image.shape

    # 너비와 높이의 차
    difference = abs(h - w)

    # (top, bottom) padding or (left, right) padding
    if h <= w:
        top = difference // 2
        bottom = difference - difference // 2
        pad = [0, 0, top, bottom]
    else:
        left = difference // 2
        right = difference - difference // 2
        pad = [left, right, 0, 0]

    # Add padding
    image = F.pad(image, pad, mode='constant', value=pad_value)
    return image, pad

def resize(image, size):
    return F.interpolate(image.unsqueeze(0), size, mode='bilinear', align_corners=True).squeeze(0)


class ImageFolder(torch.utils.data.Dataset):
    def __init__(self, folder_path, image_size):
        self.image_files = sorted(glob.glob("{}/*.*".format(folder_path)))
        self.image_size = image_size

    def __getitem__(self, index):
        image_path = self.image_files[index]

        # Extract image as PyTorch tensor
        image = torchvision.transforms.ToTensor()(Image.open(image_path).convert('RGB'))

        # Pad to square resolution
        image, _ = pad_to_square(image)

        # Resize
        image = resize(image, self.image_size)
        return image_path, image

    def __len__(self):
        return len(self.image_files)



class ListDataset(torch.utils.data.Dataset):
    def __init__(self, list_path: str, image_size: int, augment: bool, multiscale: bool, normalized_labels=True):
        with open(list_path, 'r') as file:
            self.image_files = file.readlines()

        self.label_files = [path.replace('images', 'labels').replace('.png', '.txt').replace('.jpg', '.txt')
                                .replace('JPEGImages', 'labels') for path in self.image_files]
        self.image_size = image_size
        self.max_objects = 100
        self.augment = augment
        self.multiscale = multiscale
        self.normalized_labels = normalized_labels
        self.batch_count = 0

    def __getitem__(self, index):
        # 1. Image
        # -----------------------------------------------------------------------------------
        image_path = self.image_files[index].rstrip()

        # Apply augmentations
        if self.augment:
            transforms = torchvision.transforms.Compose([
                torchvision.transforms.ColorJitter(brightness=1.5, saturation=1.5, hue=0.1),
                torchvision.transforms.ToTensor()
            ])
        else:
            transforms = torchvision.transforms.ToTensor()

        # Extract image as PyTorch tensor
        image = transforms(Image.open(image_path).convert('RGB'))

        _, h, w = image.shape
        h_factor, w_factor = (h, w) if self.normalized_labels else (1, 1)

        # Pad to square resolution
        image, pad = pad_to_square(image)
        _, padded_h, padded_w = image.shape

        # 2. Label
        # -----------------------------------------------------------------------------------
        label_path = self.label_files[index].rstrip()

        targets = None
        if os.path.exists(label_path):
            boxes = torch.from_numpy(np.loadtxt(label_path).reshape(-1, 5))

            # Extract coordinates for unpadded + unscaled image
            x1 = w_factor * (boxes[:, 1] - boxes[:, 3] / 2)
            y1 = h_factor * (boxes[:, 2] - boxes[:, 4] / 2)
            x2 = w_factor * (boxes[:, 1] + boxes[:, 3] / 2)
            y2 = h_factor * (boxes[:, 2] + boxes[:, 4] / 2)

            # Adjust for added padding
            x1 += pad[0]
            y1 += pad[2]
            x2 += pad[1]
            y2 += pad[3]

            # Returns (x, y, w, h)
            boxes[:, 1] = ((x1 + x2) / 2) / padded_w
            boxes[:, 2] = ((y1 + y2) / 2) / padded_h
            boxes[:, 3] *= w_factor / padded_w
            boxes[:, 4] *= h_factor / padded_h

            targets = torch.zeros((len(boxes), 6))
            targets[:, 1:] = boxes

        # Apply augmentations
        if self.augment:
            if np.random.random() < 0.5:
                image, targets = horisontal_flip(image, targets)

        return image_path, image, targets

    def __len__(self):
        return len(self.image_files)

    def collate_fn(self, batch):
        paths, images, targets = list(zip(*batch))

        # Remove empty placeholder targets
        targets = [boxes for boxes in targets if boxes is not None]

        # Add sample index to targets
        for i, boxes in enumerate(targets):
            boxes[:, 0] = i

        try:
            targets = torch.cat(targets, 0)
        except RuntimeError:
            targets = None  # No boxes for an image

        # Selects new image size every 10 batches
        if self.multiscale and self.batch_count % 10 == 0:
            self.image_size = random.choice(range(320, 608 + 1, 32))

        # Resize images to input shape
        images = torch.stack([resize(image, self.image_size) for image in images])
        self.batch_count += 1

        return paths, images, targets

In [5]:
# 다양한 입력 매개변수를 분석한다. 아니면 기본 매개변수를 사용한다
# ipynb에서는 argparse를 사용할 수 없다. easydict로 대체할 쉬 있다. 
# https://worthpreading.tistory.com/56
args = easydict.EasyDict({
    "epoch": 1,
    "gradient_accumulation": 1,
    "multiscale_training": True,
    "batch_size": 32,
    "num_workers": 0,
    "data_config": "config/coco.data",
    "pretrained_weights": 'weights/darknet53.conv.74',
    "image_size": 416
})
print(args)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
now = time.strftime('%y%m%d_%H%M%S', time.localtime(time.time()))

# Tensorboard writer 객체 생성
log_dir = os.path.join('logs', now)
os.makedirs(log_dir, exist_ok=True)
writer = torch.utils.tensorboard.SummaryWriter(log_dir)

# 데이터셋 설정값을 가져오기
data_config = parse_data_config(args.data_config)
train_path = data_config['train']
valid_path = data_config['valid']
num_classes = int(data_config['classes'])
class_names = load_classes(data_config['names'])


# # 모델 준비하기
model = YOLOv3.YOLOv3(args.image_size, num_classes).to(device)
model.apply(init_weights_normal)
# if args.pretrained_weights.endswith('.pth'):
#     model.load_state_dict(torch.load(args.pretrained_weights))
# else:
#     model.load_darknet_weights(args.pretrained_weights)

dataset = ListDataset(train_path, args.image_size, augment=True, multiscale=args.multiscale_training)
dataloader = torch.utils.data.DataLoader(dataset,
                                         batch_size=args.batch_size,
                                         shuffle=True,
                                         num_workers=args.num_workers,
                                         pin_memory=True,
                                         collate_fn=dataset.collate_fn)

# optimizer 설정
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# learning rate scheduler 설정
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.8)

# 현재 배치 손실값을 출력하는 tqdm 설정
loss_log = tqdm(total=0, position=2, bar_format='{desc}', leave=False)

# Train code.
for epoch in tqdm(range(args.epoch), desc='Epoch'):
    # 모델을 train mode로 설정
    model.train()
    # 1 epoch의 각 배치에서 처리하는 코드
    for batch_idx, (_, images, targets) in enumerate(tqdm(dataloader, desc='Batch', leave=False)):
        step = len(dataloader) * epoch + batch_idx

        # 이미지와 정답 정보를 GPU로 복사
        images = images.to(device)
        targets = targets.to(device)

        # 순전파 (forward), 역전파 (backward)
        loss, outputs = model(images, targets)
        loss.backward()

        # 기울기 누적 (Accumulate gradient)
        if step % args.gradient_accumulation == 0:
            optimizer.step()
            optimizer.zero_grad()

        # 총 손실값 출력
        loss_log.set_description_str('Loss: {:.6f}'.format(loss.item()))

        # Tensorboard에 훈련 과정 기록
        tensorboard_log = []
        for i, yolo_layer in enumerate(model.yolo_layers):
            writer.add_scalar('loss_bbox_{}'.format(i + 1), yolo_layer.metrics['loss_bbox'], step)
            writer.add_scalar('loss_conf_{}'.format(i + 1), yolo_layer.metrics['loss_conf'], step)
            writer.add_scalar('loss_cls_{}'.format(i + 1), yolo_layer.metrics['loss_cls'], step)
            writer.add_scalar('loss_layer_{}'.format(i + 1), yolo_layer.metrics['loss_layer'], step)
        writer.add_scalar('total_loss', loss.item(), step)

    # lr scheduler의 step을 진행
    scheduler.step()

    # 검증 데이터셋으로 모델을 평가
    precision, recall, AP, f1, _, _, _ = evaluate(model,
                                                  path=valid_path,
                                                  iou_thres=0.5,
                                                  conf_thres=0.5,
                                                  nms_thres=0.5,
                                                  image_size=args.image_size,
                                                  batch_size=args.batch_size,
                                                  num_workers=args.num_workers,
                                                  device=device)

    # Tensorboard에 평가 결과 기록
    writer.add_scalar('val_precision', precision.mean(), epoch)
    writer.add_scalar('val_recall', recall.mean(), epoch)
    writer.add_scalar('val_mAP', AP.mean(), epoch)
    writer.add_scalar('val_f1', f1.mean(), epoch)

    # checkpoint file 저장
    save_dir = os.path.join('checkpoints', now)
    os.makedirs(save_dir, exist_ok=True)
    dataset_name = os.path.split(args.data_config)[-1].split('.')[0]
    torch.save(model.state_dict(), os.path.join(save_dir, 'yolov3_{}_{}.pth'.format(dataset_name, epoch)))

{'epoch': 1, 'gradient_accumulation': 1, 'multiscale_training': True, 'batch_size': 32, 'num_workers': 0, 'data_config': 'config/coco.data', 'pretrained_weights': 'weights/darknet53.conv.74', 'image_size': 416}
config/coco.data
../../data/coco/coco_classes.txt




Epoch:   0%|                                                                                     | 0/1 [00:00<?, ?it/s]
Batch:   0%|                                                                                     | 0/1 [00:00<?, ?it/s][A

feature map1 shape : torch.Size([3, 256, 68, 68])
feature map2 shape : torch.Size([3, 512, 34, 34])
feature map3 shape : torch.Size([3, 1024, 17, 17])
1 ok
2 ok



Epoch:   0%|                                                                                     | 0/1 [00:03<?, ?it/s][A


IndexError: The shape of the mask [3, 3, 17, 17] at index 0 does not match the shape of the indexed tensor [1, 3, 1, 1] at index 0