#### json 형태의 데이터를 coco format json 형태로 변환

In [1]:
import os
import json
import glob
import json
import cv2
import numpy as np
import pandas as pd

from tqdm import tqdm

In [4]:
# data_dir 받아서 coco format 수정
class RemakeCOCOformat():
    def __init__(self, img_dir, ann_dir, data_lst=None, n_sample=None, alis=None, ratio=0.05, labeling_schme=None, task=None):
        self.base_img_path = img_dir
        self.base_label_path = ann_dir
        self.images = glob.glob(os.path.join(self.base_img_path, r"*.jpg"))
        self.annotations = glob.glob(os.path.join(self.base_label_path ,r"*.json"))
        
        self.labeling_schme = labeling_schme
        self.task = task
        
        self.img_id = 0
        self.ann_id = 0

        self.ratio = ratio
        
        if data_lst:
            self.images = [ os.path.join(self.base_img_path,f.replace('.json', '.jpg')) for f in data_lst ]
            self.annotations = [ os.path.join(self.base_label_path,f.replace('.jpg', '.json')) for f in data_lst ]
            self.train_fn = alis
            
        if n_sample:
            self.n_sample = n_sample

    def load_json(self, file_name):
        with open(file_name, "r") as f:
            ann = json.load(f)
        return ann

    def save_json(self, file, file_name):
        with open(file_name, "w") as f:
            json.dump(file, f)
          
    def rebuilding(self, d, img_lst):
        for i in img_lst:
            self.img_id += 1
            ann = self.load_json(i)

            ann['images']['id'] = self.img_id
            img_info = ann['images']
            ann_info = ann['annotations']

            d['images'].append(img_info)

            for a in ann_info:
                if a[self.task] != '':
                    self.ann_id += 1
                    a['id'] = self.ann_id
                    a['image_id'] = self.img_id
                    if self.labeling_schme:
                        if a[self.task] in self.labeling_schme:
                            a['category_id'] = self.labeling_schme.index(a[self.task])
                        else:
                            a['category_id'] = len(self.labeling_schme)
                    a['segmentation'] = [a['segmentation']]

                    d['annotations'].append(a)

        return d
            
    
    def coco_json(self):
        train = self.load_json(self.annotations[0])
        train['images'] = []
        train['annotations'] = []
                

        if self.labeling_schme:
            cates = [{"id":i+1, "name":v}for i,v in enumerate(self.labeling_schme)]
            if self.task == 'part':
                cates.append({"id":len(self.labeling_schme)+1, "name":'etc'})
            train['categories']= cates
                

        
        train_imgs = [] 

        for i in self.annotations:
            ann = self.load_json(i)
            ann_info = ann['annotations']

            if len(ann_info) != 0:
                train_imgs.append(i)
        

      
        train = self.rebuilding(train, train_imgs)
        print(len(train['images'])) 
        
        if not os.path.exists("datainfo"):
            os.makedirs("datainfo")
            
        if not os.path.exists("train_coco"):
            os.makedirs("train_coco")
                
        if not os.path.exists("label_coco"):
            os.makedirs("label_coco")
        
        self.save_json(train, os.path.join("datainfo" ,self.train_fn + ".json"))
        


def label_split(data_dir):
    annotations = glob.glob(os.path.join(data_dir, r"*.json"))

    def load_json(file_name):
        with open(file_name, "r") as f:
            ann = json.load(f)
        return ann
    
    label_schme = {
    1:{"files":[],"label_info":'스크래치'},
    2:{"files":[],"label_info":'파손'},
    3:{"files":[],"label_info":'찌그러짐'},
    4:{"files":[],"label_info":'이격'},    
    }

    for ann in annotations:
        parse = load_json(ann)
        for a in parse['annotations']:
            label_schme[a['category_id']]['files'].append(ann)
    
    for i in label_schme:
        label_schme[i]['files'] = np.random.choice(list(set(label_schme[i]['files'])), 10, replace = False)
    
    for i in label_schme:
        coco = RemakeCOCOformat('rst', data_lst=label_schme[i]['files'], alis = f"_label{i}")
        coco.coco_json()

    return label_schme

In [5]:
train_name = os.listdir(os.getcwd() + '/train')
len(train_name), train_name[0]

(50445, '0506233_sc-202337.jpg')

In [7]:
# 기존의 json이 COCO format의 json으로 변환되어 저장된다.
if __name__ == "__main__":
    label_df = pd.read_csv('label_instance.csv')
    label_df = label_df.loc[label_df.total_anns > 0 & label_df.index.isin(train_name)]
    print(len(label_df))

    idx = 0


    dir_name_img = 'train'
    dir_name_label = 'label'
    l_sch = ["Scratched","Separated","Crushed","Breakage"]

    # test, val
    tmp = list(label_df.loc[label_df['dataset']=='val']['index'].values)
    test = RemakeCOCOformat(img_dir = dir_name_img, ann_dir = dir_name_label, data_lst = tmp, alis=f'damage_val', ratio=0, labeling_schme=l_sch, task='damage')
    test.coco_json()



50445
50445
