### データフレームの準備

In [1]:
import glob
import os
import json
import pandas as pd
import shutil
import json
import numpy as np
from tqdm import tqdm

In [2]:
# jsonファイルの読み込み
base_dir = 'datasets/COCOdata/COCOdata'
json_dicts = {}
data_kinds = ['train', 'test']
for data_kind in data_kinds:
    with open(f'{base_dir}/{data_kind}/labels_original.json', 'r') as f:
        json_dicts[data_kind] = json.load(f)

In [3]:
# imagesキー内の'id'と'file_name'と'width'と'height'を紐づけ
df_dict = {}
for data_kind, json_dict in json_dicts.items():
    images = json_dict['images']
    images_id = []
    for image in images:
        image_id = {}
        image_id['image_id'] = image['id']
        image_id['file_name'] = image['file_name']
        image_id['width'] = image['width']
        image_id['height'] = image['height']
        images_id.append(image_id)
    df_dict[data_kind] = pd.DataFrame(images_id)

In [4]:
# annotationsキー内の'id'と'bbox'と'category_id'を紐づけ
for data_kind,json_dict in json_dicts.items():
    annotations = json_dict['annotations']
    annotations_id = []
    for annotation in annotations:
        annotation_id = {}
        annotation_id['image_id'] = annotation['image_id']
        annotation_id['bbox'] = annotation['bbox']
        annotation_id['category_id'] = annotation['category_id']
        annotations_id.append(annotation_id)
    df_annotations_id = pd.DataFrame(annotations_id)
    df_dict[data_kind] = pd.merge(df_dict[data_kind], df_annotations_id, on='image_id')

In [5]:
# categoriesキー内の'id'と'name'を紐づけ
for data_kind,json_dict in json_dicts.items():
    categories = json_dict['categories']
    categories_id = []
    for category in categories:
        category_id = {}
        category_id['category_id'] = category['id']
        category_id['name'] = category['name']
        categories_id.append(category_id)
    df_categories_id = pd.DataFrame(categories_id)
    df_dict[data_kind] = pd.merge(df_dict[data_kind], df_categories_id, on='category_id')

In [10]:
# train, valでデータフレームを分ける
# かなり時間かかるから注意！
columns = df_dict['test'].columns.tolist()
train_txt = f'{base_dir}/train/train.txt'
val_txt = f'{base_dir}/train/val.txt'
with open(train_txt, 'r') as f:
    train_list = f.readlines()
    train_list = [train_name.split('/')[-1].replace('\n', '') for train_name in train_list]
with open(val_txt, 'r') as f:
    val_list = f.readlines()
    val_list = [val_name.split('/')[-1].replace('\n', '') for val_name in val_list]

df_train = pd.DataFrame(columns=columns)
for train_name in tqdm(train_list):
    _df = df_dict['train'][df_dict['train']['file_name'] == train_name]
    df_train = pd.concat([df_train, _df],ignore_index=True)
df_train

100%|████████████████████████████████████████████████████████████████████████████| 73559/73559 [37:07<00:00, 33.03it/s]


Unnamed: 0,image_id,file_name,width,height,bbox,category_id,name
0,28163,000000214306.jpg,640,424,"[81.04, 121.65, 23.72, 23.22]",47,cup
1,41476,000000314846.jpg,500,375,"[0.0, 1.6999999999999997, 353.74, 372.45]",1,person
2,41476,000000314846.jpg,500,375,"[158.13, 140.76, 182.87, 234.23999999999998]",1,person
3,32845,000000250165.jpg,640,426,"[224.71, 212.79, 142.11, 207.1]",62,chair
4,51501,000000389624.jpg,612,612,"[78.33, 321.25, 51.06, 90.19000000000001]",62,chair
...,...,...,...,...,...,...,...
362320,20191,000000152769.jpg,480,640,"[418.88, 113.91999999999999, 61.12, 197.29]",3,car
362321,20191,000000152769.jpg,480,640,"[35.96, 4.31, 372.49, 291.96]",1,person
362322,20191,000000152769.jpg,480,640,"[172.58, 310.21, 120.81, 99.24]",47,cup
362323,20191,000000152769.jpg,480,640,"[242.33999999999997, 283.81, 78.02, 77.29]",47,cup


In [11]:
df_val = pd.DataFrame(columns=columns)
for val_name in tqdm(val_list):
    _df = df_dict['train'][df_dict['train']['file_name'] == val_name]
    df_val = pd.concat([df_val, _df],ignore_index=True)
df_val

100%|██████████████████████████████████████████████████████████████████████████████| 3254/3254 [00:52<00:00, 61.88it/s]


Unnamed: 0,image_id,file_name,width,height,bbox,category_id,name
0,68183,000000516516.jpg,640,480,"[273.13, 0.0, 363.76, 369.65]",1,person
1,17402,000000131522.jpg,483,485,"[120.97999999999999, 7.63, 354.21, 468.65]",1,person
2,17402,000000131522.jpg,483,485,"[86.38, 65.26, 64.04, 71.16]",1,person
3,17402,000000131522.jpg,483,485,"[172.74, 23.74, 40.38, 63.86000000000001]",1,person
4,17402,000000131522.jpg,483,485,"[206.41, 23.08, 53.01, 57.45]",1,person
...,...,...,...,...,...,...,...
16027,27780,000000211281.jpg,427,640,"[227.93, 0.95, 95.63, 93.13]",1,person
16028,27780,000000211281.jpg,427,640,"[277.18, 0.81, 91.21, 99.51]",1,person
16029,27780,000000211281.jpg,427,640,"[369.02, 14.66, 57.00000000000001, 92.1]",1,person
16030,27780,000000211281.jpg,427,640,"[0.0, 0.0, 87.19, 83.57]",1,person


In [12]:
df_dict['train'] = df_train
df_dict['val'] = df_val

In [13]:
# カテゴリ番号と名前を取り出したデータフレーム
df_unique_category = df_dict['test'][['category_id', 'name']].drop_duplicates()
df_unique_category = df_unique_category.sort_values('category_id')
df_unique_category = df_unique_category.reset_index(drop=True)
df_unique_category.index = np.arange(1, len(df_unique_category)+1)
df_unique_category

Unnamed: 0,category_id,name
1,1,person
2,3,car
3,10,trafficlight
4,47,cup
5,62,chair


In [14]:
df_dict['train'].head(3)

Unnamed: 0,image_id,file_name,width,height,bbox,category_id,name
0,28163,000000214306.jpg,640,424,"[81.04, 121.65, 23.72, 23.22]",47,cup
1,41476,000000314846.jpg,500,375,"[0.0, 1.6999999999999997, 353.74, 372.45]",1,person
2,41476,000000314846.jpg,500,375,"[158.13, 140.76, 182.87, 234.23999999999998]",1,person


In [15]:
df_bboxes_dict = {}
for dataset_kind, df in df_dict.items():
    bboxes = []
    for i, row in tqdm(df.iterrows()):
        bbox_dict = {}
        bbox =  row['bbox']
        bbox_dict['image_id'] = row['image_id']
        bbox_dict['file_name'] = row['file_name']
        bbox_dict['width'] = row['width']
        bbox_dict['height'] = row['height']
        bbox_dict['category_id'] = row['category_id']
        bbox_dict['name'] = row['name']
        bbox_dict['xmin'] = bbox[0]
        bbox_dict['ymin'] = bbox[1]
        bbox_dict['xmax'] = bbox[0] + bbox[2]
        bbox_dict['ymax'] = bbox[1] + bbox[3]
        bboxes.append(bbox_dict)
    df_bboxes = pd.DataFrame(bboxes)
    df_bboxes_dict[dataset_kind] = df_bboxes

362325it [00:14, 25850.20it/s]
16263it [00:00, 25777.79it/s]
16032it [00:00, 26822.33it/s]


In [16]:
df_bboxes_dict['train']

Unnamed: 0,image_id,file_name,width,height,category_id,name,xmin,ymin,xmax,ymax
0,28163,000000214306.jpg,640,424,47,cup,81.04,121.65,104.76,144.87
1,41476,000000314846.jpg,500,375,1,person,0.00,1.70,353.74,374.15
2,41476,000000314846.jpg,500,375,1,person,158.13,140.76,341.00,375.00
3,32845,000000250165.jpg,640,426,62,chair,224.71,212.79,366.82,419.89
4,51501,000000389624.jpg,612,612,62,chair,78.33,321.25,129.39,411.44
...,...,...,...,...,...,...,...,...,...,...
362320,20191,000000152769.jpg,480,640,3,car,418.88,113.92,480.00,311.21
362321,20191,000000152769.jpg,480,640,1,person,35.96,4.31,408.45,296.27
362322,20191,000000152769.jpg,480,640,47,cup,172.58,310.21,293.39,409.45
362323,20191,000000152769.jpg,480,640,47,cup,242.34,283.81,320.36,361.10


### XMLファイルの作成

In [17]:
from xml.etree.ElementTree import *
import xml.etree.ElementTree as ET
import xml.dom.minidom as md
from PIL import Image

In [18]:
# 画像のサイズとチャネル数を取得
def get_image_info(path):
    img = Image.open(path)
    return img.width, img.height, img.getbands()

In [19]:
# ディレクトリとファイル名のみに分割
def file_dir_split(file_path):
    split_path = file_path.rsplit('/',1)
    dir_path = split_path[0]
    file_name = split_path[1]
    return file_name, dir_path

In [20]:
# VOCアノテーションXML ファイルを作成
def create_xml(ann_xml):
    annotation = Element('annotation')

    folder = SubElement(annotation, 'folder')
    folder.text = ann_xml['folder']

    path = SubElement(annotation, 'path')
    path.text = ann_xml['path']

    source = SubElement(annotation, 'source')
    database = SubElement(source, 'database')
    database.text = ann_xml['source']['database']

    size = SubElement(annotation, 'size')
    width = SubElement(size, 'width')
    height = SubElement(size, 'height')
    depth = SubElement(size, 'depth')
    width.text = ann_xml['size']['width']
    height.text = ann_xml['size']['height']
    depth.text = ann_xml['size']['depth']

    segmented = SubElement(annotation, 'segmented')
    segmented.text = ann_xml['segmented']

    for i, ann_object in enumerate(ann_xml['objects']):
        object = SubElement(annotation, 'object')

        name = SubElement(object, 'name')
        name.text = ann_object['name']

        pose = SubElement(object, 'pose')
        pose.text = ann_object['pose']

        truncated = SubElement(object, 'truncated')
        truncated.text = ann_object['truncated']

        difficult = SubElement(object, 'difficult')
        difficult.text = ann_object['difficult']

        bndbox = SubElement(object, 'bndbox')
        xmin = SubElement(bndbox, 'xmin')
        xmin.text = ann_object['bndbox']['xmin']
        ymin = SubElement(bndbox, 'ymin')
        ymin.text = ann_object['bndbox']['ymin']
        xmax = SubElement(bndbox, 'xmax')
        xmax.text = ann_object['bndbox']['xmax']
        ymax = SubElement(bndbox, 'ymax')
        ymax.text = ann_object['bndbox']['ymax']

    file_name, dir_path = file_dir_split(ann_xml['path'])
    file_name = file_name.split('.')[0]
    out_file_path = f'{dir_path}/{file_name}.xml'
    document = md.parseString(ET.tostring(annotation, 'utf-8'))
    with open(out_file_path, 'w') as f:
        document.writexml(f, encoding='utf-8', newl='\n', indent='', addindent='  ')

In [21]:
base_dir = 'datasets/COCOdata/COCOdata/Annotations_original'

In [23]:
# XMLファイルに変換
for dataset_kind, df in df_bboxes_dict.items():
    file_names = df['file_name'].unique()
    for file_name in tqdm(file_names):
        ann_xml = {}
        ann_xml['folder'] = dataset_kind
        ann_xml['filename'] = file_name
        ann_xml['path'] = f'{base_dir}/{ann_xml["filename"]}'
        ann_xml['source'] = {
            "database": 'Unknown'
        }
        width = df[df['file_name'] == file_name]['width'].unique().item()
        height= df[df['file_name'] == file_name]['height'].unique().item()
        depth = 3
        ann_xml['size'] = {
            "width": str(width),
            "height": str(height),
            "depth": str(depth)
        }
        ann_xml['segmented'] = '0'
        ann_xml['objects'] = []
        for index, row in df[df['file_name'] == file_name].iterrows():
            xmin = int(row['xmin'])
            ymin = int(row['ymin'])
            xmax = int(row['xmax'])
            ymax = int(row['ymax'])
            if xmin >= xmax:
                xmax += 1
                if xmax > width:
                    xmax = width
                    xmin -= 1
                print(dataset_kind)
                print(file_name)
                print(xmin, xmax)
            if ymin >= ymax:
                ymax += 1
                if ymax > height:
                    ymax = height
                    ymin -= 1
                print(dataset_kind)
                print(file_name)
                print(ymin, ymax)
            bndbox = {
                "xmin": str(xmin),
                "ymin": str(ymin),
                "xmax": str(xmax),
                "ymax": str(ymax)
            }
            object = {
                "name": row['name'],
                "pose": 'Unspecified',
                "truncated": '0',
                "difficult": '0',
                "bndbox": bndbox
            }
            ann_xml['objects'].append(object)
        create_xml(ann_xml)

 18%|█████████████▌                                                              | 13109/73559 [11:38<43:30, 23.15it/s]

train
000000550395.jpg
188 189


 45%|██████████████████████████████████▏                                         | 33045/73559 [26:34<29:30, 22.89it/s]

train
000000375219.jpg
284 285


100%|██████████████████████████████████████████████████████████████████████████| 73559/73559 [1:01:45<00:00, 19.85it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 3254/3254 [00:13<00:00, 249.03it/s]
 20%|███████████████▉                                                              | 663/3254 [00:02<00:11, 230.58it/s]

val
000000088835.jpg
200 201


100%|█████████████████████████████████████████████████████████████████████████████| 3254/3254 [00:14<00:00, 230.62it/s]


In [24]:
# ディレクトリとファイル名のみに分割
def file_dir_split(file_path):
    split_path = file_path.rsplit('/',1)
    dir_path = split_path[0]
    file_name = split_path[1]
    return file_name, dir_path

In [27]:
# 指定拡張子のファイル名取得
def glob_file_names(dir_path, ext):
    files = glob.glob(f'{dir_path}/*.{ext}')
    files = [file.replace('\\', '/') for file in files]
    return files

In [28]:
# データセットリストファイル train.txt, test.txt, val.txtを作成
main_dir = 'MSCOCO/ImageSets/Main'
for dataset_kind in df_bboxes_dict.keys():
    xml_dir = f'{base_dir}'
    xml_files = glob_file_names(xml_dir, 'xml')
    with open(f'{main_dir}/{dataset_kind}.txt', 'w') as f:
        for file in xml_files:
            file_name, _ = file_dir_split(file)
            file_name = file_name.split('.')[0]
            f.write(f'{file_name}\n')

In [30]:
txt_files = glob_file_names(main_dir, 'txt')

In [35]:
for txt_file in txt_files:
    with open(txt_file, 'r') as f:
        lines = f.readlines()
    with open(txt_file, 'w') as f:
        for line in lines:
            line = line.split('.')[0]
            line = line.split('/')[-1]
            f.write(f'{line}\n')

In [36]:
import shutil

In [38]:
# test originalファイルをコピー
from_dir = 'MSCOCO/Annotations_original'
to_dir = 'MSCOCO/Annotations'
test_txt = 'MSCOCO/ImageSets/Main/test.txt'

with open(test_txt, 'r') as f:
    files = f.readlines()
    files = [file.replace('\n', '') for file in files]
for file in files:
    from_file = f'{from_dir}/{file}.xml'
    shutil.copy(from_file, to_dir)

In [39]:
test_dict = {
    "traffic light": 3
}

In [40]:
test_dict['traffic light']

3