In [1]:
import os
from pathlib import Path
import json
import matplotlib.pyplot as plt
import pandas as pd
import shutil
import collections
import cv2
from unidecode import unidecode

In [2]:
DATA = Path('../data/filtered/circle')

## READ UNIVERSAL LABEL

In [3]:
global_label_map = pd.read_csv(DATA / 'label_map.csv', index_col=0)
global_label_map['label_name_standard'] = global_label_map['label_name'].apply(lambda x: unidecode(x))
global_label_map

Unnamed: 0,id,label_name,label_name_standard
0,0,"sẹo_mụn_(lõm,_lồi)","seo_mun_(lom,_loi)"
1,1,"sang_thương_viêm_(sẩn,_mụn_mủ,_mảng_viêm_đỏ)","sang_thuong_viem_(san,_mun_mu,_mang_viem_do)"
2,2,sang_thương_nang_và_nốt,sang_thuong_nang_va_not
3,3,còi_(đóng/mở),coi_(dong/mo)
4,4,dát_tăng_sắc_tố_(vết_thâm),dat_tang_sac_to_(vet_tham)


In [4]:
global_label = dict(zip(global_label_map['id'], global_label_map['label_name_standard']))
global_label_vn = dict(zip(global_label_map['label_name'], global_label_map['label_name_standard']))
global_label_inv = {v:k for k, v in global_label.items()}
global_label, global_label_inv

({0: 'seo_mun_(lom,_loi)',
  1: 'sang_thuong_viem_(san,_mun_mu,_mang_viem_do)',
  2: 'sang_thuong_nang_va_not',
  3: 'coi_(dong/mo)',
  4: 'dat_tang_sac_to_(vet_tham)'},
 {'seo_mun_(lom,_loi)': 0,
  'sang_thuong_viem_(san,_mun_mu,_mang_viem_do)': 1,
  'sang_thuong_nang_va_not': 2,
  'coi_(dong/mo)': 3,
  'dat_tang_sac_to_(vet_tham)': 4})

In [5]:
# Regenerate global label
def generate_global_label():
    global_label = []
    bbox = DATA / 'bbox'
    img = DATA / 'image'

    for b in bbox.iterdir():
        if '.csv' in b.name:
            df = pd.read_csv(b, index_col=0)
            acnes = df['label'].tolist()
            global_label.extend(acnes)
    stats = collections.Counter(global_label)
    print(stats)
    pd.Series(global_label).value_counts().plot(kind='pie', autopct='%.2f%%')
    
    # get index
    global_label = list(set(global_label))
    global_label = {k:v for k, v in enumerate(global_label)}
    global_label_inv = {v:k for k, v in global_label.items()}
    global_label_map = pd.DataFrame(data={
        'id': global_label.keys(),
        'label_name': global_label.values()
        }
    )
    global_label_map.to_csv(DATA / 'label_map.csv')
    return global_label_map

## GENERATE COCO

In [6]:
def generate_dataset_coco(path):
    def parse_str2dict(t):
        text = t.strip()
        text = text.replace('{','').replace('}','').replace('\'','').replace('\"','')
        items = text.split(',')
        keys, values = [], []
        for item in items:
            k,v = item.split(':')
            keys.append(k.strip())
            values.append(int(v.strip()))
        return dict(zip(keys, values))
    
    bbox_f = path / 'bbox'
    img_f = path / 'image'
    
    coco = {
        'info': {},
        'licenses': [],
        'categories': [],
        'images': [],
        'annotations': []
    }
    
    # Info
    #
    # License
    #
    # Category
    categories = [{"id": _id, "name": name} for _id, name in global_label.items()]
    coco['categories'] = categories
    # Image
    image_infos = []
    all_images = [i for i in img_f.iterdir() if '.jpeg' in str(i)]
    for idx, f in enumerate(all_images):
        img_info = {}
        n = f.name
        p = str(f)
        img = cv2.imread(p)
        h,w = img.shape[:2]
        img_info['id'] = idx
        img_info['width'] = w
        img_info['height'] = h
        img_info['file_name'] = n
        image_infos.append(img_info)
    coco['images'] = image_infos
    # Annotation
    anno_id = 0
    anno_infos = []
    for img_info in coco['images']:
        img_id = img_info['id']
        bbox_n = img_info['file_name'].split('.')[0] + '.csv'
        bbox_content = pd.read_csv(str(bbox_f / bbox_n), index_col=0)
        for an_idx, row in bbox_content.iterrows():
            category_id = global_label_inv[global_label_vn[row['label']]]
            b_c = parse_str2dict(row['objects'])
            bbox = [b_c['left'], b_c['top'], b_c['width'], b_c['height']]
            x0, y0, x1, y1 = b_c['left'], b_c['top'], b_c['left'] + b_c['width'], b_c['top'] + b_c['height']
            anno_info = {
                'id': anno_id,
                'image_id': int(img_id),
                'category_id': int(category_id),
                'segmentation': [[x0, y0, x0, y1, x1, y1,x1, y0]],
                'area': b_c['width'] * b_c['height'],
                'bbox': bbox,
                'iscrowd': 0,
            }
            anno_infos.append(anno_info)
            anno_id += 1
    coco['annotations'] = anno_infos
    return coco

In [7]:
coco = generate_dataset_coco(DATA)

In [8]:
len(coco['images'])

747

In [9]:
json.dump(coco, open(DATA / 'annotations.json','w'))