## 데이터 구조 변경
Mask R-CNN을 Detectron2로 학습하기 위한 데이터 전처리 및 기타 사항들 변경

In [40]:
import os
import re
import json
import splitfolders
from glob import glob

In [43]:
image_path = "./Data/images/*/*.png"
json_path = "./Data/labels/*/*.json"

image_filenames = glob(image_path)
json_filenames = glob(json_path)

# train dataset의 image, label데이터 모두 한글 제거
for filename in json_filenames:
    renamed = re.sub("[^A-Za-z0-9_\\\\/.]", "", string=filename)
    os.rename(filename, renamed)

for filename in image_filenames:
    renamed = re.sub("[^A-Za-z0-9_\\\\/.]", "", string=filename)
    os.rename(filename, renamed)

In [67]:
label_dirs = ["./Data/labels/class1/", "./Data/labels/class2/", "./Data/labels/class3/", "./Data/labels/class4/"]

# json 파일 내부에 있는 파일 이름 교체
for label_dir in label_dirs:
    for json_file in os.listdir(label_dir):
        filename = json_file[:json_file.rfind(".")]

        with open(label_dir + json_file, "r", encoding="utf8") as f:
            file = json.load(f)

        file["Meta"]["Image_FileName"] = f"{filename}.png"
        file["Image_Annotation"]["Image File Name"] = f"{filename}.png"

        with open(label_dir + json_file, "w") as f1:
            json.dump(file, f1)
        
        f.close()
        f1.close()

In [68]:
# Dataset의 Annotation에 category_id 값 추가
for idx, label_dir in enumerate(label_dirs):
    for json_file in os.listdir(label_dir):
        with open(label_dir + json_file, "r", encoding="utf8") as f:
            file = json.load(f)
        
        for values in file["Image_Annotation"]["Annotations"]:
            values["category_id"] = idx

        with open(label_dir + json_file, "w") as f1:
            json.dump(file, f1)

        f.close()
        f1.close()

In [70]:
splitfolders.ratio("./Data/images/", "./Splitted", seed=11, ratio=(0.7, 0.15, 0.15))

Copying files: 1571 files [00:16, 95.52 files/s] 


In [71]:
splitfolders.ratio("./Data/labels/", "./Splitted_labels", seed=11, ratio=(0.7, 0.15, 0.15))

Copying files: 1571 files [00:10, 146.16 files/s]


In [83]:
def find_invalid_annotations(json_folder):
    invalid_annotations = []

    # JSON 파일을 모두 순회
    for filename in os.listdir(json_folder):
        if filename.endswith('.json'):
            filepath = os.path.join(json_folder, filename)
            with open(filepath, 'r') as f:
                data = json.load(f)
            
            # 'Image_Annotation' 섹션 확인
            if 'Image_Annotation' in data:
                annotations = data['Image_Annotation']['Annotations']
                for annotation in annotations:
                    # 주석(annotation)이 'polygon' 형식이고, 좌표의 개수가 2개인지 확인
                    if annotation['Annotation Type'] == 'polygon' and \
                            len(annotation['X Coordinate']) <= 2:
                        invalid_annotations.append((filename, annotation['Annotation ID']))

    return invalid_annotations

json_folder = './Splitted/train/labels/class3/'

invalid_annotations = find_invalid_annotations(json_folder)
filelist = []
if invalid_annotations:
    print("Following annotations are invalid:")
    for filename, annotation_id in invalid_annotations:
        print(f"File: {filename}, Invalid Annotation ID: {annotation_id}")
        filelist.append(filename)
filenames = [i.replace(".json", "") for i in filelist]
for filename in filenames:
    os.remove(f'./Splitted/train/labels/class3/{filename}.json')
    os.remove(f'./Splitted/train/images/class3/{filename}.png')

No invalid annotations found.


In [80]:
filenames = [i.replace(".json", "") for i in filelist]

In [82]:
for filename in filenames:
    os.remove(f'./Splitted/train/labels/class3/{filename}.json')
    os.remove(f'./Splitted/train/images/class3/{filename}.png')