In [1]:
import json
import random
import glob
import os
import shutil
from rich.pretty import pprint

from PIL import Image, ImageDraw, ImageFont
import numpy as np
import matplotlib.pyplot as plt
import tqdm

from mmengine.config import Config
from mmengine.runner import set_random_seed
from mmengine.runner import Runner
import mmcv


# 전역변수 설정
# 색상(list) (총 24개)
COLORS = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 255),
          (0, 255, 255), (255, 128, 0), (128, 0, 255), (0, 255, 128), (255, 128, 128),
          (128, 255, 128), (128, 128, 255), (128, 128, 0), (128, 0, 128), (0, 128, 128),
          (192, 64, 0), (192, 192, 64), (64, 192, 192), (64, 64, 192), (192, 64, 192),
          (64, 192, 64), (255, 192, 128), (128, 255, 192), (128, 192, 255)]

# 라벨이름(dict) : area_code 및 PM_code (총 35개)
# 주의 : 원본데이터에서 라벨 34는 제외되어있음. 즉 1부터36까지 34제외하고 35개
LABEL_NAMES = ['인도', '횡단보도', '자전거 도로', '교차로', '중앙 차선', '안전지대',
              '정지선', '정지선 위반 판별구역', '보행자 신호등 녹색', '보행자 신호등 적색',
              '차량 신호등 녹색', '차량 신호등 적색', '오토바이', '오토바이_보행자도로 통행위반',
              '오토바이_안전모 미착용', '오토바이_무단횡단', '오토바이_신호위반', '오토바이_정지선위반',
              '오토바이_횡단보도 주행위반', '자전거', '자전거 캐리어', '자전거_보행자도로 통행위반',
              '자전거_안전모 미착용', '자전거_무단횡단', '자전거_신호위반', '자전거_정지선위반',
              '자전거_횡단보도 주행위반', '킥보드', '킥보드 캐리어', '킥보드_보행자도로 통행위반',
              '킥보드_안전모 미착용', '킥보드_무단횡단', '킥보드_신호위반', '킥보드_횡단보도 주행위반',
              '킥보드_동승자 탑승위반']
LABEL_NAME = {str(k):v for k, v in zip(list(range(1,34)) + [35, 36], LABEL_NAMES)}

# data폴더 상대경로
FILE_PATH = 'data'

def random_sample():
    """
    랜덤 이미지와 그에 맞는 라벨을 뽑아주는 함수
    """
    # data폴더안에 아무 json 선택
    choice = random.choice(
        glob.glob(FILE_PATH + '/**/*.json', recursive=True)
    )
    # json과 그에 맞는 jpg 불러오기
    with open(choice, 'r') as f:
        annot = json.load(f)

    choice = choice.replace('라벨링데이터', '원천데이터').replace('.json', '.jpg')
    img = Image.open(choice)

    return annot, img


def pic_with_annotation(annot, img):
    """
    라벨(Seg, bbox)을 이미지에 올려주는 함수
    """
    draw = ImageDraw.Draw(img, 'RGBA')
    font = ImageFont.truetype("./batang.ttc", 30)

    # polygon은 1부터 12까지 12종류의 고유한 area_code를 가진다.
    # 그에 맞는 고유 색상 매핑
    poly_color = {str(k):(r, g, b, 70) for k, (r, g, b) in \
                  zip(range(1,13), COLORS)}
    # 위반코드는 13부터 36까지 (34제외하고) 23종류의 고유한 PM_code를 가진다.
    # 그에 맞는 색상 매핑
    box_color = {str(k):(r, g, b, 255) for k, (r, g, b) in \
                  zip(range(13, 37), COLORS)}

    # polygon 그리기
    for seg in annot['annotations']['environment']:
        area_code = seg['area_code']
        # 원본 좌표가 y,x 로 되어있으므로 뒤집기
        points = [(x,y) for [y, x] in seg['points']]

        draw.polygon(points, fill=poly_color[area_code])
    
    # bbox 그리기
    for box in annot['annotations']['PM']:
        PM_code = box['PM_code']
        # points 양식은 [left, top, width, height] 이다.
        # PIL 양식은 [x0, y0, x1, y1] 이므로 이에 맞게 변환
        # 이때 x0, y0 : 왼쪽상단, x1, y1 : 오른쪽하단
        left, top, width, height = box['points']
        points = [left, top, left+width, top+height]

        draw.rectangle(points, outline=box_color[PM_code], width=3)

        text = LABEL_NAME[PM_code]
        tbbox = draw.textbbox([points[0], points[1]-31], text, font=font)
        draw.rectangle(tbbox, fill=box_color[PM_code])
        draw.text([points[0], points[1]-31], text, font=font, fill='black')
        

# json 표시해주는 함수
def pprint_json(path, **kwargs):
    with open(path, 'r') as f:
        js = json.load(f)
    
    pprint(js, **kwargs)

In [None]:
annot, img = random_sample()
pic_with_annotation(annot, img)
img

In [33]:
# 현재 json 형식에서 YOLO에 맞는 형태로 변환
# YOLOv5 라벨형식 : class_label, x_mid(0~1), y_mid(0~1), width(0~1), height(0~1) (txt파일)
# 현재 json 형식 : left, top, width, height


def to_yolo_format(annot):
    """
    라벨을 YOLOv5 format으로 변경해누즌 함수
    Bounding Box만 변환
    """
    img_width = annot['description']['imageWidth']
    img_height = annot['description']['imageHeight']

    string = ''
    for seg in annot['annotations']['PM']:
        label = seg['PM_code']
        left, top, width, height = seg['points']
        # 0~1 사이로 정규화
        x = (left + width/2) / img_width
        y = (top - height/2) / img_height
        width = width / img_width
        height = height / img_height

        string += f'{label}, {x}, {y}, {width}, {height}\n'
    return string[:-1]


print(to_yolo_format(annot))

23, 0.3578966797331708, 0.24928150011542877, 0.02329546191910118, 0.08205073924757693
27, 0.059656732684043576, 0.37394960753563766, 0.05889679870142048, 0.14823112584043904


In [11]:
# COCO형식으로 변환
# Train:Valid:Test = 7:2:1 로 변환
""" 파일구조
COCO
    annotations
    ...
    train
    ...
    val
    ...
    test
    ...
"""
os.makedirs(FILE_PATH + '/coco/annotations', exist_ok=True)
os.makedirs(FILE_PATH + '/coco/train', exist_ok=True)
os.makedirs(FILE_PATH + '/coco/val', exist_ok=True)
os.makedirs(FILE_PATH + '/coco/test ', exist_ok=True)

files = glob.glob(FILE_PATH + '/라벨링데이터/**/*.json', recursive=True)
random.seed(42)
random.shuffle(files)

jpgs = [i.replace('라벨링데이터', '원천데이터').replace('.json', '.jpg') for i in files]
train_count = int(len(files) * 0.7)
valid_count = int(len(files) * 0.2)
# test_counts = 0.1

# for file_path in tqdm.tqdm(jpgs[:train_count]):
#     shutil.copy(file_path, FILE_PATH + f'/coco/train/{os.path.basename(file_path).split(".")[0]}.jpg')

# for file_path in tqdm.tqdm(jpgs[train_count:train_count+valid_count]):
#     shutil.copy(file_path, FILE_PATH + f'/coco/val/{os.path.basename(file_path).split(".")[0]}.jpg')

# for file_path in tqdm.tqdm(jpgs[train_count+valid_count:]):
#     shutil.copy(file_path, FILE_PATH + f'/coco/test/{os.path.basename(file_path).split(".")[0]}.jpg')


def to_COCO(files, mode='train'):
    
    coco = {
        "categories": [
            {"id": 1, "name": "오토바이", "supercategory": "none"},
            {"id": 2, "name": "오토바이보행자도로 통행위반", "supercategory": "none"},
            {"id": 3, "name": "오토바이안전모 미착용", "supercategory": "none"},
            {"id": 4, "name": "오토바이무단횡단", "supercategory": "none"},
            {"id": 5, "name": "오토바이신호위반", "supercategory": "none"},
            {"id": 6, "name": "오토바이정지선위반", "supercategory": "none"},
            {"id": 7, "name": "오토바이횡단보도 주행위반", "supercategory": "none"},
            {"id": 8, "name": "자전거", "supercategory": "none"},
            {"id": 9, "name": "자전거 캐리어", "supercategory": "none"},
            {"id": 10, "name": "자전거보행자도로 통행위반", "supercategory": "none"},
            {"id": 11, "name": "자전거안전모 미착용", "supercategory": "none"},
            {"id": 12, "name": "자전거무단횡단", "supercategory": "none"},
            {"id": 13, "name": "자전거신호위반", "supercategory": "none"},
            {"id": 14, "name": "자전거정지선위반", "supercategory": "none"},
            {"id": 15, "name": "자전거횡단보도 주행위반", "supercategory": "none"},
            {"id": 16, "name": "킥보드", "supercategory": "none"},
            {"id": 17, "name": "킥보드 캐리어", "supercategory": "none"},
            {"id": 18, "name": "킥보드보행자도로 통행위반", "supercategory": "none"},
            {"id": 19, "name": "킥보드안전모 미착용", "supercategory": "none"},
            {"id": 20, "name": "킥보드무단횡단", "supercategory": "none"},
            {"id": 21, "name": "킥보드신호위반", "supercategory": "none"},
            {"id": 22, "name": "킥보드횡단보도 주행위반", "supercategory": "none"},
            {"id": 23, "name": "킥보드동승자 탑승위반", "supercategory": "none"}
        ],
        "images": [],
        "annotations": []
    }


    for idx, file in tqdm.tqdm(enumerate(files)):
        with open(file, 'r') as f:
            annot = json.load(f)

        coco['images'].append(
            {
            'id':idx+1,
            'file_name':os.path.basename(file).replace('json', 'jpg'),
            'height':annot['description']['imageHeight'],
            'width':annot['description']['imageWidth'],
            }
        )
        # bbox와 카테고리 변환
        for pm in annot['annotations']['PM']:
            one_object = {"image_id":idx+1,
                          "category_id":int(pm['PM_code'])-12 if int(pm['PM_code']) < 34 else int(pm['PM_code'])-13,
                          "bbox":pm['points'],
                          'area':pm['points'][2] * pm['points'][3]}

            coco['annotations'].append(one_object)

    for idx in range(len(coco['annotations'])):
        coco['annotations'][idx]['id'] = idx + 1

    with open(FILE_PATH + f"/coco/annotations/{mode}_annotations.json", "w") as json_file:
        json.dump(coco, json_file, ensure_ascii=False)


to_COCO(files[:train_count], mode='train')
to_COCO(files[train_count:train_count+valid_count], mode='valid')
to_COCO(files[train_count+valid_count:], mode='test')

# train 39496
# valid 11284
# test 5643

39496it [04:19, 152.29it/s]
11284it [01:14, 151.25it/s]
5643it [00:36, 155.39it/s]


## Faster RCNN(기본모델) 훈련

In [17]:


# Faster RCNN R-50-FPN (FP16) 모델 설정
config_file = 'mmdetection/configs/faster_rcnn/faster-rcnn_r50_fpn_amp-1x_coco.py'

cfg = Config.fromfile(config_file)

ds_type = 'CocoDataset'
classes = tuple(LABEL_NAMES[12:])

cfg['data_root'] = 'data/coco/'
# 메타정보에 클래스와 컬러 입력
cfg.metainfo = {
    'classes':classes,
    'pallete':COLORS[:23]
}
# ROI head의 클래스 숫자도 변경
cfg.model.roi_head.bbox_head.num_classes = 23

# 데이터로더 세팅. 
# ipynb 환경이면 num_workers=0, persistent_workers=False로 맞출것! .py이면 보통 gpu갯수*4 정도)
cfg.train_dataloader.dataset.update(
    {'data_prefix':{'img':'train/'},
     'ann_file':'annotations/train_annotations.json',
     'data_root':cfg.data_root,
     'metainfo':cfg.metainfo}
)
cfg.train_dataloader.update({'batch_size':16, 'num_workers':0, 'persistent_workers':False})

cfg.val_dataloader.dataset.update(
    {'data_prefix':{'img':'val/'},
     'ann_file':'annotations/valid_annotations.json',
     'data_root':cfg.data_root,
     'metainfo':cfg.metainfo}
)
cfg.val_dataloader.update({'num_workers':0, 'persistent_workers':False})

cfg.test_dataloader.dataset.update(
    {'data_prefix':{'img':'test/'},
     'ann_file':'annotationsss/test_annotations.json',
     'data_root':cfg.data_root,
     'metainfo':cfg.metainfo}
)
cfg.test_dataloader.update({'num_workers':0, 'persistent_workers':False})

cfg.val_evaluator.ann_file = cfg.data_root + cfg.val_dataloader.dataset.ann_file
cfg.test_evaluator.ann_file = cfg.data_root + cfg.test_dataloader.dataset.ann_file

# pre-trained모델 불러오는 경로와, 작업물 경로 설정
cfg.load_from = 'checkpoints/faster_rcnn_r50_fpn_fp16_1x_coco_20200204-d4dc1471.pth'
cfg.work_dir = './output'

# 검증 인터벌과 체크포인트 저장 인터벌, 로그 인터벌 설정
cfg.train_cfg.val_interval = 3
cfg.default_hooks.checkpoint.interval = 5
cfg.default_hooks.logger.interval = 10

# 학습률 조정 (SGD)
cfg.optim_wrapper.optimizer.lr = .02

# 랜덤시드 설정. 단, 완전 결정적이진 않게(왜 그런지 이유는 아직 모름)
set_random_seed(42, deterministic=False)

# 텐서보드 시각화 벡엔드 추가
cfg.visualizer.vis_backends.append({'type':'TensorboardVisBackend'})

cfg.dump('faster_rcnn_cfg.py')

In [None]:
!python mmdetection/tools/train.py faster_rcnn_cfg.py
# 작동안됨..

## RTMDet 훈련

In [2]:
config_file = f"""
# Inherit and overwrite part of the config based on this config
_base_ = 'mmdetection/configs/rtmdet/rtmdet_tiny_8xb32-300e_coco.py'

data_root = 'data/coco/' # dataset root

train_batch_size_per_gpu = 16
train_num_workers = 2

max_epochs = 20
stage2_num_epochs = 1
base_lr = 0.00008


metainfo = dict(
    classes={tuple(LABEL_NAMES[12:])},
    pallete={COLORS[:23]}
)

train_dataloader = dict(
    batch_size=train_batch_size_per_gpu,
    num_workers=train_num_workers,
    dataset=dict(
        data_root=data_root,
        metainfo=metainfo,
        data_prefix=dict(img='train/'),
        ann_file='annotations/train_annotations.json'))

val_dataloader = dict(
    dataset=dict(
        data_root=data_root,
        metainfo=metainfo,
        data_prefix=dict(img='val/'),
        ann_file='annotations/valid_annotations.json'))

test_dataloader = dict(
    dataset=dict(
        data_root=data_root,
        metainfo=metainfo,
        data_prefix=dict(img='test/'),
        ann_file='annotations/test_annotations.json'))

val_evaluator = dict(ann_file=data_root + 'annotations/valid_annotations.json')

test_evaluator = dict(ann_file=data_root + 'annotations/train_annotations.json')

model = dict(bbox_head=dict(num_classes=23))

# learning rate
param_scheduler = [
    dict(
        type='LinearLR',
        start_factor=1.0e-5,
        by_epoch=False,
        begin=0,
        end=10),
    dict(
        # use cosine lr from 10 to 20 epoch
        type='CosineAnnealingLR',
        eta_min=base_lr * 0.05,
        begin=max_epochs // 2,
        end=max_epochs,
        T_max=max_epochs // 2,
        by_epoch=True,
        convert_to_iter_based=True),
]

train_pipeline_stage2 = [
    dict(type='LoadImageFromFile', backend_args=None),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(
        type='RandomResize',
        scale=(640, 640),
        ratio_range=(0.1, 2.0),
        keep_ratio=True),
    dict(type='RandomCrop', crop_size=(640, 640)),
    dict(type='YOLOXHSVRandomAug'),
    dict(type='RandomFlip', prob=0.5),
    dict(type='Pad', size=(640, 640), pad_val=dict(img=(114, 114, 114))),
    dict(type='PackDetInputs')
]

# optimizer
optim_wrapper = dict(
    _delete_=True,
    type='OptimWrapper',
    optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
    paramwise_cfg=dict(
        norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))

default_hooks = dict(
    checkpoint=dict(
        interval=5,
        max_keep_ckpts=2,  # only keep latest 2 checkpoints
        save_best='auto'
    ),
    logger=dict(type='LoggerHook', interval=5))

custom_hooks = [
    dict(
        type='PipelineSwitchHook',
        switch_epoch=max_epochs - stage2_num_epochs,
        switch_pipeline=train_pipeline_stage2)
]

# load COCO pre-trained weight
load_from = 'checkpoints/rtmdet_tiny_8xb32-300e_coco_20220902_112414-78e30dcc.pth'
work_dir = './output'

train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=max_epochs, val_interval=2)
visualizer = dict(vis_backends=[dict(type='LocalVisBackend'),dict(type='TensorboardVisBackend')])
"""

with open('rtmdet_tiny_cfg.py', 'w') as f:
    f.write(config_file)

In [3]:
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'
cfg = Config.fromfile('rtmdet_tiny_cfg.py')
runner = Runner.from_cfg(cfg)
runner.train()

08/22 11:48:19 - mmengine - [4m[97mINFO[0m - 
------------------------------------------------------------
System environment:
    sys.platform: win32
    Python: 3.11.3 | packaged by Anaconda, Inc. | (main, Apr 19 2023, 23:46:34) [MSC v.1916 64 bit (AMD64)]
    CUDA available: True
    numpy_random_seed: 1734647644
    GPU 0: NVIDIA GeForce GTX 1060
    CUDA_HOME: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1
    NVCC: Cuda compilation tools, release 10.1, V10.1.24
    MSVC: n/a, reason: fileno
    PyTorch: 2.0.1+cu118
    PyTorch compiling details: PyTorch built with:
  - C++ Version: 199711
  - MSVC 193431937
  - Intel(R) Math Kernel Library Version 2020.0.2 Product Build 20200624 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v2.7.3 (Git Hash 6dbeffbae1f23cbbeae17adb7b5b13f1f37c080e)
  - OpenMP 2019
  - LAPACK is enabled (usually provided by MKL)
  - CPU capability usage: AVX2
  - CUDA Runtime 11.8
  - NVCC architecture flags: -gencode;arch=compute_37,

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


08/22 11:49:16 - mmengine - [4m[97mINFO[0m - Epoch(train)  [1][  5/876]  base_lr: 3.5556e-05 lr: 3.5556e-05  eta: 1 day, 8:48:32  time: 6.7435  data_time: 2.9293  memory: 5250  loss: 2.7928  loss_cls: 2.1074  loss_bbox: 0.6854
08/22 11:49:29 - mmengine - [4m[97mINFO[0m - Epoch(train)  [1][ 10/876]  base_lr: 8.0000e-05 lr: 8.0000e-05  eta: 22:38:01  time: 4.6534  data_time: 1.4680  memory: 5115  loss: 2.8435  loss_cls: 2.1818  loss_bbox: 0.6617
08/22 11:49:41 - mmengine - [4m[97mINFO[0m - Epoch(train)  [1][ 15/876]  base_lr: 8.0000e-05 lr: 8.0000e-05  eta: 19:09:37  time: 3.9404  data_time: 0.9804  memory: 5115  loss: 2.8192  loss_cls: 2.1510  loss_bbox: 0.6682
08/22 11:49:54 - mmengine - [4m[97mINFO[0m - Epoch(train)  [1][ 20/876]  base_lr: 8.0000e-05 lr: 8.0000e-05  eta: 17:24:21  time: 3.5807  data_time: 0.7367  memory: 5115  loss: 2.8241  loss_cls: 2.1637  loss_bbox: 0.6604
08/22 11:50:06 - mmengine - [4m[97mINFO[0m - Epoch(train)  [1][ 25/876]  base_lr: 8.0000e-05 lr

RuntimeError: nms_impl: implementation for device cuda:0 not found.
