In [1]:
# 필요한 library 선언

import os
import cv2
import json
import shutil
import numpy as np
from tqdm import tqdm
from glob import glob
from typing import Dict, List
import matplotlib.pyplot as plt
from collections import defaultdict

In [2]:
# * json structure

'''
{
    "file_name": "train_100000.json",
    # ! shape안에 bbox가 2개 이상 있을 수 있는 구조
    "shapes": [
        {
            "label": "04_lymph",
            "points": [
                # ! (point1_x, point1_y), (point2_x, point2_y), (point3_x, point3_y), (point4_x, point4_y) (좌상, 우상, 우하, 좌하)
                [303.991341991342, 368.83116883116884], [373.68831168831167, 368.83116883116884], [373.68831168831167, 493.5064935064935], [303.991341991342, 493.5064935064935]
            ]
        }
    ],
    "imageData": "~",
    "imageHeight": 576,
    "imageWidth": 576
}
'''

'\n{\n    "file_name": "train_100000.json",\n    # ! shape안에 bbox가 2개 이상 있을 수 있는 구조\n    "shapes": [\n        {\n            "label": "04_lymph",\n            "points": [\n                # ! (point1_x, point1_y), (point2_x, point2_y), (point3_x, point3_y), (point4_x, point4_y) (좌상, 우상, 우하, 좌하)\n                [303.991341991342, 368.83116883116884], [373.68831168831167, 368.83116883116884], [373.68831168831167, 493.5064935064935], [303.991341991342, 493.5064935064935]\n            ]\n        }\n    ],\n    "imageData": "~",\n    "imageHeight": 576,\n    "imageWidth": 576\n}\n'

In [3]:
def convert_to_coco(root_path: os.PathLike, save_path: os.PathLike) -> None:
    """
        only for train dataset
    """
    res = defaultdict(list)
    json_paths = glob(os.path.join(root_path, 'train', '*.json'))
    
    categories = {
        '01_ulcer': 1,
        '02_mass': 2,
        '04_lymph': 3,
        '05_bleeding': 4
    }

    # info for coco dataset
    res["info"] = {
        "description": "BB-dataset",
        "url": "",
        "version": "1.0",
        "year": 2021,
        "contributor": "MINYONG_YOUNGJUNE_WOOSUNG",
        "date_created": "2021/11/23"
    }
    
    n_id = 0
    for json_path in tqdm(json_paths):
        with open(json_path, 'r') as f:
            tmp = json.load(f)
            
        # licenses 제외
        image_id = int(tmp['file_name'].split('_')[-1][:6])
        res['images'].append({
            'id': image_id,
            'width': tmp['imageWidth'],
            'height': tmp['imageHeight'],
            'file_name': tmp['file_name'].replace('json', 'jpg'),  # json을 모두 jpg로 변환
            "coco_url": "",
            "license": 0,
            "date_captured": "",
            "flickr_url": ""
        })
        
        for shape in tmp['shapes']:
            box = np.array(shape['points'])
            
            x1, y1, x2, y2 = min(box[:, 0]), min(box[:, 1]), max(box[:, 0]), max(box[:, 1])
            w, h = x2 - x1, y2 - y1
            
            res['annotations'].append({
                'id': n_id,
                'image_id': image_id,
                'category_id': categories[shape['label']],
                'area': w * h,
                'bbox': [x1, y1, w, h], 
                'iscrowd': 0,
            })
            n_id += 1
    
    for name, id in categories.items():
        res['categories'].append({
            'id': id,
            'name': name,
        })
        
    with open(save_path, 'w') as f:
        json.dump(res, f)

In [4]:
def get_colors(classes: List) -> Dict[str, tuple]:
    colors = [ # BGR 형태
        (0, 0, 255), # 1: 빨강
        (0, 255, 255), # 2: 노랑
        (255, 0, 0), # 3: 파랑
        (0, 255, 0) # 4: 초록
    ]
    return {c: colors[idx] for idx, (c) in enumerate(classes)}  # 각 클래스별 색상 지정
    

def draw_bbox(
    data_path: os.PathLike,
    coco_path: os.PathLike,
    save_path: os.PathLike,
    n_images: int = 10,
) -> None:

    '''
        visualization based on COCO format annotation
    ''' 
    with open(coco_path, 'r') as f:
        ann_json = json.load(f)
        
    images = [{v['id']: v['file_name']} for v in ann_json['images']]
    categories = {v['id']: v['name'] for v in ann_json['categories']}
    
    ann = defaultdict(list)
    for a in ann_json['annotations']:
        bbox = list(map(round, a['bbox']))
        ann[a['image_id']].append(
            {
                'category_id': categories.get(a['category_id']),
                'bbox': bbox,
            }
        )
        
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    else:
        shutil.rmtree(save_path)
        os.makedirs(save_path)
        
    colors = get_colors(categories.values()) 
    for v in tqdm(images[:n_images]):
        image_id, file_name = list(v.items())[0]
        file_path = os.path.join(data_path, file_name)
            
        image = cv2.imread(file_path)
        # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        annots = ann[image_id]
        
        for a in annots:
            label = a['category_id']
            x1, y1, w, h = a['bbox']
            x2, y2 = x1 + w, y1 + h
            
            cv2.rectangle(image, (x1, y1), (x2, y2), colors[label], 2)
            (tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_COMPLEX, 0.6, 1)
            cv2.rectangle(image, (x1, y1-20), (x1+tw, y1), colors[label], -1)
            cv2.putText(image, label, (x1, y1-5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)

        file_name = file_name.split('.')[0] + '.jpg'
        cv2.imwrite(os.path.join(save_path, file_name), image)

In [5]:
convert_to_coco(root_path='../json_data', save_path='../data/train_annotations.json')
draw_bbox(data_path='../data/train/', coco_path='../data/train_annotations.json', save_path='../examples/before_train/')

100%|██████████| 62622/62622 [00:06<00:00, 9042.34it/s]
100%|██████████| 10/10 [00:00<00:00, 149.81it/s]


In [6]:
def convert_to_coco_test(root_path: os.PathLike, save_path: os.PathLike) -> None:
    """
        only for train dataset
    """
    res = defaultdict(list)
    json_paths = glob(os.path.join(root_path, 'test', '*.json'))
    
    categories = {
        '01_ulcer': 1,
        '02_mass': 2,
        '04_lymph': 3,
        '05_bleeding': 4
    }

    # info for coco dataset
    res["info"] = {
        "description": "BB-dataset",
        "url": "",
        "version": "1.0",
        "year": 2021,
        "contributor": "MINYONG_YOUNGJUNE_WOOSUNG",
        "date_created": "2021/11/23"
    }
    
    for json_path in tqdm(json_paths):
        with open(json_path, 'r') as f:
            tmp = json.load(f)
            
        # licenses 제외
        image_id = int(tmp['file_name'].split('_')[-1][:6])
        res['images'].append({
            'id': image_id,
            'width': tmp['imageWidth'],
            'height': tmp['imageHeight'],
            'file_name': tmp['file_name'].replace('json', 'jpg'),  # json을 모두 jpg로 변환
            "coco_url": "",
            "license": 0,
            "date_captured": "",
            "flickr_url": ""
        })
    
    for name, id in categories.items():
        res['categories'].append({
            'id': id,
            'name': name,
        })
        
    with open(save_path, 'w') as f:
        json.dump(res, f)

In [7]:
# Test Dataset Annotations
convert_to_coco_test(root_path='../json_data', save_path='../data/test_annotations.json')

100%|██████████| 20874/20874 [00:01<00:00, 11376.53it/s]
