### データフレームの準備

In [18]:
import glob
import os
import json
import pandas as pd
import shutil
from PIL import Image

In [19]:
# アノテーションcsvをDataFrameに読み込み
base_dir = 'datasets/annotation_bbox'
test_ann_path  = f'{base_dir}/test-annotations-bbox.csv'
train_ann_path = f'{base_dir}/train-annotations-bbox.csv'
val_ann_path   = f'{base_dir}/val-annotations-bbox.csv'
df_test  = pd.read_csv(test_ann_path)
df_train = pd.read_csv(train_ann_path)
df_val   = pd.read_csv(val_ann_path)

In [20]:
# ラベル名変換用dict
label_dict = {
    "/m/01g317": "person",
    "/m/0k4j"  : "car",
    "/m/0c9ph5": "flower",
    "/m/0bt9lr": "dog",
    "/m/0cgh4" : "building"
}

In [21]:
# ラベル名を変換
for key, value in label_dict.items():
    df_test  = df_test.replace(key, value)
    df_train = df_train.replace(key, value)
    df_val   = df_val.replace(key, value)

In [22]:
# ラベル名とクラス（カテゴリ）IDの紐付け外部DataFrameの作成
label_dict4df = {
    "building": 0,
    "car": 1,
    "person": 2,
    "flower": 3,
    "dog": 4
}
df_label = pd.DataFrame(label_dict4df.items(), columns=['LabelName', 'CategoryID'])
df_label

Unnamed: 0,LabelName,CategoryID
0,building,0
1,car,1
2,person,2
3,flower,3
4,dog,4


In [23]:
# 画像データセットの場所とデータセットの種類['test', 'train', 'val']
open_images_dir = 'datasets/open_images_baobab/open_images_baobab'
dataset_kinds = ['test', 'train', 'val']

In [24]:
# データセット（画像）の元の置き場所のパスをdict型で保存
dataset_path = {}
for dataset_kind in dataset_kinds:
    dataset_path[dataset_kind] = f'{open_images_dir}/all/{dataset_kind}'

### 画像ファイルの移動

In [None]:
# open_imagesのまとめディレクトリallを作成
all_dir_path = f'{open_images_dir}/all'
if not os.path.isdir(all_dir_path):
    os.mkdir(all_dir_path)
for dataset_kind in dataset_kinds:
    kind_dir_path = f'{all_dir_path}/{dataset_kind}'
    if not os.path.isdir(kind_dir_path):
        os.mkdir(kind_dir_path)

In [None]:
# open_imagesの中で各カテゴリからallへまとめる
for value in label_dict.values():
    for dataset_kind in dataset_kinds:
        tar_dir = f'{open_images_dir}/{value}/{dataset_kind}/image'
        files = glob.glob(f'{tar_dir}/*.jpg')
        for file in files:
            shutil.copy(file, f'{open_images_dir}/all/{dataset_kind}')

### GoogleOpenImagesのアノテーションデータからSSD用のVOCアノテーションデータに変換

In [25]:
# DataFrameにCategoryIDを追加
df_test  = pd.merge(df_test, df_label, on='LabelName').sort_values('ImageID')
df_train = pd.merge(df_train, df_label, on='LabelName').sort_values('ImageID')
df_val   = pd.merge(df_val, df_label, on='LabelName').sort_values('ImageID')

df_test  = df_test.sort_values('ImageID').reset_index(drop=True)
df_train = df_train.sort_values('ImageID').reset_index(drop=True)
df_val   = df_val.sort_values('ImageID').reset_index(drop=True)

In [26]:
# DataFrameをdict型に保存
df_dict = {
    'test' : df_test,
    'train': df_train,
    'val'  : df_val
}

In [27]:
# 指定拡張子のファイル名取得
def glob_file_names(dir_path, ext):
    files = glob.glob(f'{dir_path}/*.{ext}')
    files = [file.replace('\\', '/') for file in files]
    return files

In [28]:
# 画像のパスをdict型で取得しつつ、画像枚数を調べる
files_dict = {}
for dataset_kind in dataset_kinds:
    files_dict[dataset_kind] = glob_file_names(f'{dataset_path[dataset_kind]}','jpg')
for kind, files in files_dict.items():
    print(f'{kind}:\t{len(files)}')

test:	500
train:	3998
val:	500


In [29]:
# 画像のサイズ情報をデータフレームに追加する(これでXMLファイルを作るための情報がすべてデータフレーム内にある)
for dataset_kind, files in files_dict.items():
    img_size_list = []
    for file in files:
        img = Image.open(file)
        image_id = file.split('/')[-1].split('.')[0]
        img_size = [f'{image_id}', img.size[0], img.size[1]]
        img_size_list.append(img_size)
    df_size = pd.DataFrame(img_size_list, columns=['ImageID', 'Width', 'Height'])
    df_dict[dataset_kind] = pd.merge(df_dict[dataset_kind], df_size, on='ImageID')
    df_dict[dataset_kind]  = df_dict[dataset_kind].sort_values('ImageID').reset_index(drop=True)
    
df_dict[dataset_kind]

Unnamed: 0,ImageID,Source,LabelName,Confidence,XMin,XMax,YMin,YMax,IsOccluded,IsTruncated,...,XClick2X,XClick3X,XClick4X,XClick1Y,XClick2Y,XClick3Y,XClick4Y,CategoryID,Width,Height
0,00a0b916fd5941a3,baobab,flower,1,0.724551,0.843457,0.000000,0.122734,0,0,...,-1,-1,-1,-1,-1,-1,-1,3,1024,768
1,00a0b916fd5941a3,baobab,flower,1,0.350586,0.750469,0.338138,0.870156,0,0,...,-1,-1,-1,-1,-1,-1,-1,3,1024,768
2,0217ef254d8bf5cd,baobab,car,1,0.061859,0.970717,0.411240,0.937207,0,0,...,-1,-1,-1,-1,-1,-1,-1,1,683,1024
3,0217ef254d8bf5cd,baobab,car,1,0.103982,0.464392,0.019268,0.076611,0,0,...,-1,-1,-1,-1,-1,-1,-1,1,683,1024
4,0217ef254d8bf5cd,baobab,car,1,0.512401,0.848272,0.051885,0.125234,0,0,...,-1,-1,-1,-1,-1,-1,-1,1,683,1024
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2068,fff2268a1b921e8e,baobab,car,1,0.677568,0.926035,0.000000,0.169740,0,0,...,-1,-1,-1,-1,-1,-1,-1,1,1024,578
2069,fff2268a1b921e8e,baobab,car,1,0.083965,0.889395,0.066938,0.998737,0,0,...,-1,-1,-1,-1,-1,-1,-1,1,1024,578
2070,fff2268a1b921e8e,baobab,car,1,0.146455,0.415986,0.000000,0.197768,0,0,...,-1,-1,-1,-1,-1,-1,-1,1,1024,578
2071,fff2268a1b921e8e,baobab,car,1,0.514014,0.587666,0.000000,0.064204,0,0,...,-1,-1,-1,-1,-1,-1,-1,1,1024,578


### XMLファイルの作成

In [30]:
from xml.etree.ElementTree import *
import xml.etree.ElementTree as ET
import xml.dom.minidom as md
from PIL import Image

In [31]:
# 画像のサイズとチャネル数を取得
def get_image_info(path):
    img = Image.open(path)
    return img.width, img.height, img.getbands()

In [32]:
# ディレクトリとファイル名のみに分割
def file_dir_split(file_path):
    split_path = file_path.rsplit('/',1)
    dir_path = split_path[0]
    file_name = split_path[1]
    return file_name, dir_path

In [33]:
# VOCアノテーションXML ファイルを作成
def create_xml(ann_xml):
    annotation = Element('annotation')

    folder = SubElement(annotation, 'folder')
    folder.text = ann_xml['folder']

    path = SubElement(annotation, 'path')
    path.text = ann_xml['path']

    source = SubElement(annotation, 'source')
    database = SubElement(source, 'database')
    database.text = ann_xml['source']['database']

    size = SubElement(annotation, 'size')
    width = SubElement(size, 'width')
    height = SubElement(size, 'height')
    depth = SubElement(size, 'depth')
    width.text = ann_xml['size']['width']
    height.text = ann_xml['size']['height']
    depth.text = ann_xml['size']['depth']

    segmented = SubElement(annotation, 'segmented')
    segmented.text = ann_xml['segmented']

    for i, ann_object in enumerate(ann_xml['objects']):
        object = SubElement(annotation, 'object')

        name = SubElement(object, 'name')
        name.text = ann_object['name']

        pose = SubElement(object, 'pose')
        pose.text = ann_object['pose']

        truncated = SubElement(object, 'truncated')
        truncated.text = ann_object['truncated']

        difficult = SubElement(object, 'difficult')
        difficult.text = ann_object['difficult']

        bndbox = SubElement(object, 'bndbox')
        xmin = SubElement(bndbox, 'xmin')
        xmin.text = ann_object['bndbox']['xmin']
        ymin = SubElement(bndbox, 'ymin')
        ymin.text = ann_object['bndbox']['ymin']
        xmax = SubElement(bndbox, 'xmax')
        xmax.text = ann_object['bndbox']['xmax']
        ymax = SubElement(bndbox, 'ymax')
        ymax.text = ann_object['bndbox']['ymax']

    file_name, dir_path = file_dir_split(ann_xml['path'])
    file_name = file_name.split('.')[0]
    out_file_path = f'{dir_path}/{file_name}.xml'
    document = md.parseString(ET.tostring(annotation, 'utf-8'))
    with open(out_file_path, 'w') as f:
        document.writexml(f, encoding='utf-8', newl='\n', indent='', addindent='  ')

In [34]:
base_dir = 'datasets/open_images_baobab/open_images_baobab/all'

In [35]:
# XMLファイルに変換
for dataset_kind in dataset_kinds:
    df = df_dict[dataset_kind]
    image_ids = df_dict[dataset_kind]['ImageID'].unique()
    for image_id in image_ids:
        ann_xml = {}
        ann_xml['folder'] = dataset_kind
        ann_xml['filename'] = f'{image_id}.jpg'
        ann_xml['path'] = f'{dataset_path[dataset_kind]}/{ann_xml["filename"]}'
        ann_xml['source'] = {
            "database": 'Unknown'
        }
        width, height, depth = get_image_info(ann_xml['path'])
        ann_xml['size'] = {
            "width": str(width),
            "height": str(height),
            "depth": str(len(depth))
        }
        ann_xml['segmented'] = '0'
        ann_xml['objects'] = []
        for index, row in df[df['ImageID'] == image_id].iterrows():
            xmin = int(row['XMin']*width)
            ymin = int(row['YMin']*width)
            xmax = int(row['XMax']*width)
            ymax = int(row['YMax']*width)
            if xmin >= xmax:
                xmax += 1
                if xmax > width:
                    xmax = width
                    xmin -= 1
                print(dataset_kind)
                print(image_id)
                print(xmin, xmax)
            if ymin >= ymax:
                ymax += 1
                if ymax > height:
                    ymax = height
                    ymin -= 1
                print(dataset_kind)
                print(image_id)
                print(ymin, ymax)
            bndbox = {
                "xmin": str(int(row['XMin']*width)),
                "ymin": str(int(row['YMin']*height)),
                "xmax": str(int(row['XMax']*width)),
                "ymax": str(int(row['YMax']*height))
            }
            object = {
                "name": row['LabelName'],
                "pose": 'Unspecified',
                "truncated": '0',
                "difficult": '0',
                "bndbox": bndbox
            }
            ann_xml['objects'].append(object)
        create_xml(ann_xml)

In [36]:
# ディレクトリとファイル名のみに分割
def file_dir_split(file_path):
    split_path = file_path.rsplit('/',1)
    dir_path = split_path[0]
    file_name = split_path[1]
    return file_name, dir_path

In [40]:
# データセットリストファイル train.txt, test.txt, val.txtを作成
for dataset_kind in dataset_kinds:
    xml_dir = f'{base_dir}/{dataset_kind}'
    xml_files = glob_file_names(xml_dir, 'xml')
    with open(f'{base_dir}/{dataset_kind}.txt', 'w') as f:
        for file in xml_files:
            file_name, _ = file_dir_split(file)
            file_name = file_name.split('.')[0]
            f.write(f'{file_name}\n')