* COCO形式（json）の物体検出アノテーションファイルを、yolo形式（txt）に変換するスニペット <br>
https://qiita.com/john-rocky/items/29badbc8c1d86558978e
* YOLOXの学習をCOCOのデータセットから試してみた <br>
https://misoji-engineer.com/archives/yolox-tiny-train.html

In [3]:
# 1. MS-COCO データセットを確認するためのツール fiftyone をセットアップ
!pip install fiftyone
!pip install opencv-python-headless



In [9]:
# 2. fiftyone を起動し、MS-COCO の 2017 validation のデータをダウンロードして表示
import fiftyone as fo
import fiftyone.zoo as foz

# List available zoo datasets
print(foz.list_zoo_datasets())

#
# Load the COCO-2017 validation split into a FiftyOne dataset
#
# This will download the dataset from the web, if necessary
#
dataset = foz.load_zoo_dataset("coco-2017", split="validation")

# Give the dataset a new name, and make it persistent so that you can
# work with it in future sessions
# dataset.name = "coco-2017-validation-example"
# dataset.persistent = True

# Visualize the in the App
# session = fo.launch_app(dataset)

['activitynet-100', 'activitynet-200', 'bdd100k', 'caltech101', 'caltech256', 'cifar10', 'cifar100', 'cityscapes', 'coco-2014', 'coco-2017', 'fashion-mnist', 'fiw', 'hmdb51', 'imagenet-2012', 'imagenet-sample', 'kinetics-400', 'kinetics-600', 'kinetics-700', 'kinetics-700-2020', 'kitti', 'kitti-multiview', 'lfw', 'mnist', 'open-images-v6', 'open-images-v7', 'quickstart', 'quickstart-geo', 'quickstart-groups', 'quickstart-video', 'sama-coco', 'ucf101', 'voc-2007', 'voc-2012']
Downloading split 'validation' to '/root/fiftyone/coco-2017/validation' if necessary


INFO:fiftyone.zoo.datasets:Downloading split 'validation' to '/root/fiftyone/coco-2017/validation' if necessary


Found annotations at '/root/fiftyone/coco-2017/raw/instances_val2017.json'


INFO:fiftyone.utils.coco:Found annotations at '/root/fiftyone/coco-2017/raw/instances_val2017.json'


Images already downloaded


INFO:fiftyone.utils.coco:Images already downloaded


Existing download of split 'validation' is sufficient


INFO:fiftyone.zoo.datasets:Existing download of split 'validation' is sufficient


Loading existing dataset 'coco-2017-validation'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use


INFO:fiftyone.zoo.datasets:Loading existing dataset 'coco-2017-validation'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use


In [11]:
# 3. MS-COCO 2017 validation データから1000画像を抽出し、人と車と椅子の認識のための
#    学習用データ、バリデーション用データ、テスト用データを準備
classes = ["person", "car", "chair"]

train_dataset = dataset[:700]
val_dataset = dataset[700:800]
test_dataset = dataset[800:1000]

# COCO形式でエクスポート
train_dataset.export(
    export_dir=f"/content/datasets/ms-coco-subset/images/train/",
    dataset_type=fo.types.COCODetectionDataset,
    split="train",
    classes=classes,
)
val_dataset.export(
    export_dir=f"/content/datasets/ms-coco-subset/images/val/",
    dataset_type=fo.types.COCODetectionDataset,
    split="val",
    classes=classes,
)
test_dataset.export(
    export_dir=f"/content/datasets/ms-coco-subset/images/test/",
    dataset_type=fo.types.COCODetectionDataset,
    split="test",
    classes=classes,
)

Ignoring unsupported parameter 'split'




   2% |/----------------|  16/700 [127.4ms elapsed, 5.4s remaining, 125.6 samples/s] 



   6% |█\---------------|  43/700 [331.6ms elapsed, 5.1s remaining, 129.7 samples/s] 



   9% |█|---------------|  61/700 [433.2ms elapsed, 4.5s remaining, 140.8 samples/s] 







  14% |██---------------|  95/700 [639.8ms elapsed, 4.1s remaining, 148.5 samples/s] 



  26% |████-------------| 179/700 [1.2s elapsed, 3.6s remaining, 146.9 samples/s]    



  37% |██████/----------| 261/700 [1.5s elapsed, 2.5s remaining, 180.3 samples/s]    



  44% |███████\---------| 307/700 [1.8s elapsed, 2.2s remaining, 190.7 samples/s]    



  76% |████████████\----| 534/700 [2.6s elapsed, 669.1ms remaining, 264.7 samples/s] 



  91% |███████████████\-| 635/700 [3.0s elapsed, 252.5ms remaining, 263.1 samples/s] 



 100% |█████████████████| 700/700 [3.3s elapsed, 0s remaining, 229.7 samples/s]      


INFO:eta.core.utils: 100% |█████████████████| 700/700 [3.3s elapsed, 0s remaining, 229.7 samples/s]      


Ignoring unsupported parameter 'split'




 100% |█████████████████| 100/100 [492.3ms elapsed, 0s remaining, 203.1 samples/s]      


INFO:eta.core.utils: 100% |█████████████████| 100/100 [492.3ms elapsed, 0s remaining, 203.1 samples/s]      


Ignoring unsupported parameter 'split'




 100% |█████████████████| 200/200 [808.4ms elapsed, 0s remaining, 247.4 samples/s]      


INFO:eta.core.utils: 100% |█████████████████| 200/200 [808.4ms elapsed, 0s remaining, 247.4 samples/s]      


In [12]:
# 4. MS-COCO の形式で作成された画像中の物体位置を示した json形式のデータを
#    yolo v8 用の形式に変換してファイルに出力、フォルダも yolo 形式で整理
import os
import json
import glob
import shutil

image_folder = '/content/datasets/ms-coco-subset/images'
folders = ['test', 'val', 'train']
coco_json_file = 'labels.json'
yolo_txt_save_folder = '/content/datasets/ms-coco-subset/labels'

for folder in folders:
  output_dir = os.path.join(yolo_txt_save_folder, folder)
  print(output_dir)
  os.makedirs(output_dir, exist_ok=True)
  json_path = os.path.join(image_folder, folder, coco_json_file)
  print(json_path)
  json_open = open(json_path, 'r')
  json_load = json.load(json_open)

  annotations = json_load['annotations']
  images = json_load['images']

  for annotation in annotations:
    id = annotation['image_id']
    for image in images:
      if image['id'] == id:
        file_name = image['file_name']
        im_w = image['width']
        im_h = image['height']

        txt_path = os.path.join(yolo_txt_save_folder, folder, os.path.splitext(os.path.basename(file_name))[0] + '.txt')
        bbox = annotation['bbox']
        cx = float(bbox[2] + bbox[0] * 2) * 0.5 / im_w
        cy = float(bbox[3] + bbox[1] * 2) * 0.5 / im_h
        w = float(bbox[2]) / im_w
        h = float(bbox[3]) / im_h
        bbox = [cx, cy, w, h]

        cls = int(annotation['category_id'])
        line = (cls, *bbox)
        print(txt_path, line)
        # print(txt_path)
        # print(im_w, im_h, annotation['bbox'])
        # print(line)

        with open(f'{txt_path}', 'w') as f:
          f.write(('%g ' * len(line)).rstrip() % line + '\n')

for folder in folders:
  files = glob.glob(os.path.join(image_folder, folder, 'data', '*.jpg'))
  for file in files:
    dst_file = os.path.join(image_folder, folder, os.path.basename(file))
    if os.path.exists(file) and not os.path.exists(dst_file):
      shutil.move(file, dst_file)

# for folder in folders:
#   os.rmdir(os.path.join(image_folder, folder, 'data'))

/content/datasets/ms-coco-subset/labels/test
/content/datasets/ms-coco-subset/images/test/labels.json
/content/datasets/ms-coco-subset/labels/test/000000093437.txt (2, 0.7290703125, 0.5971191135734072, 0.106734375, 0.25329639889196676)
/content/datasets/ms-coco-subset/labels/test/000000093437.txt (2, 0.10914062499999999, 0.7042520775623268, 0.09231249999999999, 0.2768698060941828)
/content/datasets/ms-coco-subset/labels/test/000000093437.txt (2, 0.020624999999999998, 0.6645152354570637, 0.041249999999999995, 0.33977839335180055)
/content/datasets/ms-coco-subset/labels/test/000000093437.txt (0, 0.39738281249999996, 0.5044875346260387, 0.5488593749999999, 0.959556786703601)
/content/datasets/ms-coco-subset/labels/test/000000093717.txt (0, 0.52909375, 0.4423536299765808, 0.40715625, 0.6381498829039813)
/content/datasets/ms-coco-subset/labels/test/000000093717.txt (0, 0.87653125, 0.5807611241217799, 0.08990625, 0.3852224824355972)
/content/datasets/ms-coco-subset/labels/test/000000093717.t

In [14]:
!ls

datasets  drive  sample_data


In [13]:
# 5. yolo v8用の形式にしたデータをグーグルドライブのマイドライブに圧縮して保存
from google.colab import drive

drive.mount('/content/drive') # グーグルドライブにアクセスするための設定 (確認がある)
!tar cfvz /content/drive/MyDrive/sudspg/ms-coco-subset-sudspg-dmt3.tar.gz datasets

Mounted at /content/drive
datasets/
datasets/ms-coco-subset/
datasets/ms-coco-subset/images/
datasets/ms-coco-subset/images/train/
datasets/ms-coco-subset/images/train/000000006614.jpg
datasets/ms-coco-subset/images/train/000000076547.jpg
datasets/ms-coco-subset/images/train/000000060449.jpg
datasets/ms-coco-subset/images/train/000000034257.jpg
datasets/ms-coco-subset/images/train/000000008211.jpg
datasets/ms-coco-subset/images/train/000000054593.jpg
datasets/ms-coco-subset/images/train/000000001675.jpg
datasets/ms-coco-subset/images/train/000000044699.jpg
datasets/ms-coco-subset/images/train/000000073153.jpg
datasets/ms-coco-subset/images/train/000000073702.jpg
datasets/ms-coco-subset/images/train/000000055299.jpg
datasets/ms-coco-subset/images/train/000000030828.jpg
datasets/ms-coco-subset/images/train/000000039951.jpg
datasets/ms-coco-subset/images/train/000000004495.jpg
datasets/ms-coco-subset/images/train/000000030785.jpg
datasets/ms-coco-subset/images/train/000000028993.jpg
datas