### 학습 데이터 범위
* 1안 [현안]  
: {양호 , 불량부분(파손된 부분)} 학습

* 2안  
: {양호, 불량전체(파손된 부분을 포함하는 객체 전체를 의미하는 듯함), 불량부분} 학습

* 3안  
: {불량부분} 학습


### label

배경 0 

점자블럭 1 (파손부)

보도블럭 2 (파손부)

자전거도로 3 (파손부)

### format 변환

한 객체에 대한 점들의 집합 P = [(x1, y1), (x2, y2), ... (xn, yn)] 이라고 하고  

사진 사이즈가 width X heigth, long_edge = max(width, height), short_edge = min(width, height) 라고 할 떄

사진 좌우에 패딩 (long_edge - short_edge) // 2 = @ 만큼이 추가되었으므로

x_i' = x_i + @

그리고 long_edge X long_edge 에서 640 X 640 (target_size =640) 으로 축소되므로  

ratio = target_size /long_edge 라고 정의할 때,

P는 다음의 P' 으로 매핑된다

P' = ratio * [(x1', y1), (x2', y2), ...]



따라서 최종적으로 다음의 포맷으로 변환하기 위한 관계식은

class_id center_x center_y width height  


(target_width = target_height = target_size = 640)

x_min = minimum x coordinate in P' / image_width

x_max = maximum x coordinate in P' / image_width

y_min = minimum y coordinate in P' / image_height

y_max = maximum y coordinate in P' / image_height

다음과 같이 변환 가능할 것이다.

width = (x_max - x_min)

height = (y_max - y_min)

center_x = (x_min + x_max) / 2

center_y = (y_min + y_max) / 2

### 함수 정의

In [36]:
import json
import numpy as np
# from PIL import Image

# 단순히 사용 데이터셋의 .json 파일에서 필요한 정보를 추출하는 함수
def get_bounding_boxes_from(data : json):
    bounding_box_list = []
    labels = {"배경" : 0,
              "점자블럭" : 1,
              "보도블록" : 2,
              "자전거 도로" : 3,
              }
    # 분류할 타입
    # is_defect = ["정상", "불량부분"]
    is_defect = ["불량부분"]
    
    # labels
    for object in data["annotations"]:
        # is_defect 내에 포함되지 않으면 bounding box 를 생성하지 않음
        if not object["is_defect"] in is_defect: continue

        ## label info parsing ##

        encoded_label = 0 # default value
        try:
            label_name = object["label_name"]
            encoded_label = labels[label_name]
        except :
            print(f"잘못된 label_name : {label_name}")
            return
        
        # 일단 불량부분, 불량전체 묶어서 처리
        # if object["is_defect"] != "정상":
        #     encoded_label += 1

        # print(encoded_label)

        ## bounding box parsing ##

        x_min = y_min = 9999
        x_max = y_max = 0
        for vertex in object["annotation_info"]:
            x = vertex[0]
            y = vertex[1]
            x_max = max(x, x_max)
            x_min = min(x, x_min)
            y_max = max(y, y_max)
            y_min = min(y, y_min)

        bounding_box_list.append([encoded_label, x_min, x_max, y_min, y_max])
        # print(bounding_box_list)
    
    bounding_boxes = np.array(bounding_box_list)
    
    return bounding_boxes

# 전처리 전 원본 이미지 사이즈가 width X height, long_edge = max(width, height) 라고 하면
# 1. bounding box 를 width X height -> long_edge X long_edge -> 640 X 640 에 맞게 조정하고
# 2. YOLOv5 모델의 .txt 라벨데이터 포맷에 맞게 변환
def trans_format(bounding_boxes : np.ndarray, json_file : json):
    # class_id center_x center_y width height  
    width, height = json_file["info"]["width"], json_file["info"]["height"]

    long_edge = max(width, height)
    short_edge = min(width, height)
    
    padding = (long_edge - short_edge)/2
    
    target_size = 640.0 # 축소할 사이즈
    origin_size = long_edge # 패딩을 추가한 이미지는 long_edge X long_edge 의 사이즈가 된다.

    # where fields are space delimited, and the coordinates are normalized from zero to one.
    # Note: To convert to normalized xywh from pixel values, 
    # divide x (and width) by the image's width and divide y (and height) by the image's height.
    
    # (target_size / origin_size) * (1 / target_size) = 1 / origin_size
    ratio = round(1 / origin_size, ndigits=13)
    # ratio = round(target_size / origin_size, ndigits=13) # test

    objects = []
    for box in bounding_boxes:
        label = box[0]
    
        # width < heigth : 패딩이 x 축에만 추가
        # width > height : 패딩은 y 축에만 추가
        if (width < height) : 
            x_min = ratio * (box[1] + padding) # x_min
            x_max = ratio * (box[2] + padding) # x_max
            y_min = ratio * box[3] # y_min
            y_max = ratio * box[4] # y_max
        else:
            x_min = ratio * box[1] # x_min
            x_max = ratio * box[2] # x_max
            y_min = ratio * (box[3] + padding) # y_min
            y_max = ratio * (box[4] + padding)# y_max
        
        center_x = (x_min + x_max)/2
        center_y = (y_min + y_max)/2
        width = x_max - x_min
        height = y_max - y_min
        line = np.array([label, center_x, center_y, width, height])
        objects.append(line)

    return np.array(objects)

### test code ###
# ./라벨링데이터/09.점자블럭/1.불량/
# apple
# 2_09_1_1_1_1_20210719_0000006002.jpg
# samsung
# 2_09_1_1_1_1_20210716_0000037039.jpg

# file = "2_09_1_1_1_1_20210719_0000006002"
# file = "2_09_1_1_1_1_20210716_0000037039"
# path = "09.점자블럭/1.불량"

# empty list [] 반환
file = "2_09_0_1_4_1_20210917_0000516373"
path = "09.점자블럭/0.양호"

image_path = f"./picture/{path}/{file}.jpg" # .jpg or .jpeg
json_path = f"./label/{path}/{file}.json"

with open(json_path, "r", encoding="utf-8") as f:
    data = json.load(f)

bounding_boxes = get_bounding_boxes_from(data)

print(image_path)

print("bounding boxes")
for box in bounding_boxes:
    print(box)

translated_format = trans_format(bounding_boxes, data)
print("\ntranslated bounding boxes")
for t in translated_format:
    print(t)

np.savetxt(f"./{file}.txt", translated_format, delimiter=" ", fmt='%.16f')

./원천데이터/09.점자블럭/0.양호/2_09_0_1_4_1_20210917_0000516373.jpg
bounding boxes

translated bounding boxes


### 변환

In [39]:
import os
import json
import numpy as np
import cv2

folders = ["09.점자블럭", "12.보도블록", "13.자전거도로"]
sub_folders= ["0.양호", "1.불량"]

# folders = ["09.점자블럭"] # test
# sub_folders= ["0.양호"] # test

label_root_path = "./label"
image_root_path = "./picture"

for folder in folders:
    for sub_folder in sub_folders:
        json_path = f"{label_root_path}/{folder}/{sub_folder}"
        output_path = f"{label_root_path}/{folder}/{sub_folder}640"
        image_path = f"{image_root_path}/{folder}/{sub_folder}"

        json_files = [f for f in os.listdir(json_path) if f.endswith('.json')]

        total = len(json_files)

        print(json_path)

        # 폴더에 있는 .json 파일 순회
        for i, filename in enumerate(json_files):
            filename = filename[:-5] # .json 확장자명 제거
            if(i%300==0):
                print(f"{i+1} / {total}")

            with open(f"{json_path}/{filename}.json", "r", encoding="utf-8") as f:
                data = json.load(f)
                bounding_boxes = get_bounding_boxes_from(data)
                translated_format = trans_format(bounding_boxes, data)
                np.savetxt(f"{output_path}/{filename}.txt", translated_format, delimiter=" ", fmt='%.16f')

./라벨링데이터/09.점자블럭/0.양호
1 / 504
301 / 504
./라벨링데이터/09.점자블럭/1.불량
1 / 1578
301 / 1578
601 / 1578
901 / 1578
1201 / 1578
1501 / 1578
./라벨링데이터/12.보도블록/0.양호
1 / 1416
301 / 1416
601 / 1416
901 / 1416
1201 / 1416
./라벨링데이터/12.보도블록/1.불량
1 / 3624
301 / 3624
601 / 3624
901 / 3624
1201 / 3624
1501 / 3624
1801 / 3624
2101 / 3624
2401 / 3624
2701 / 3624
3001 / 3624
3301 / 3624
3601 / 3624
./라벨링데이터/13.자전거도로/0.양호
1 / 1380
301 / 1380
601 / 1380
901 / 1380
1201 / 1380
./라벨링데이터/13.자전거도로/1.불량
1 / 3540
301 / 3540
601 / 3540
901 / 3540
1201 / 3540
1501 / 3540
1801 / 3540
2101 / 3540
2401 / 3540
2701 / 3540
3001 / 3540
3301 / 3540


### 누락된 파일이 없는지 갯수 확인

In [None]:
import os

folders = ["09.점자블럭", "12.보도블록", "13.자전거도로"]
sub_folders= ["0.양호", "1.불량"]

root_path = "./라벨링데이터"
# 특정 경로 설정
for folder in folders:
    for sub_folder in sub_folders:
        path = f"{root_path}/{folder}/{sub_folder}"
        output_path = f"{root_path}/{folder}/{sub_folder}640"

        json_files = [f for f in os.listdir(path) if f.endswith('.json')]
        json_total = len(json_files)
        print("original json file counts : ", json_total)

        txt_files = [f for f in os.listdir(output_path) if f.endswith('.txt')]
        txt_total = len(txt_files)
        print("txt file counts : ", txt_total)