Clone the repo:

In [None]:
!git clone https://github.com/OneMagicKey/optimal-annotation-mix.git

## YOLO training

### PascalVOC

Install all the dependencies:

In [None]:
%cd optimal-annotation-mix/yolov5
%pip install -qr requirements.txt

Start training:

#### F = 800, W = 3000

In [None]:
!python segment/train.py --img 512 --batch-size 16 --f-size 2 --epochs 75 --data voc_seg/voc-seg-fw-800-3000.yaml --hyp hyp.scratch-low.yaml --weights yolov5x-cls.pt --cfg yolov5x-seg.yaml --cache --no-overlap --patience 0

In [None]:
!cd .. && rm -r datasets/VOC/images datasets/VOC/labels && cd yolov5  # remove previous training data

#### F = 800, W = 0

In [None]:
!python segment/train.py --img 512 --batch-size 16 --epochs 400 --data voc_seg/voc-seg-fw-800.yaml --hyp hyp.scratch-low.yaml --weights yolov5x-cls.pt --cfg yolov5x-seg.yaml --cache --no-overlap --patience 0

In [None]:
!cd .. && rm -r datasets/VOC/images datasets/VOC/labels && cd yolov5

#### few-shot

In [None]:
!python segment/train.py --img 512 --batch-size 16 --f-size 1 --epochs 500 --data voc_seg/voc-seg-few-shot.yaml --hyp hyp.scratch-low.yaml --weights yolov5x-cls.pt --cfg yolov5x-seg.yaml --cache --no-overlap --patience 0

In [None]:
!cd .. && rm -r datasets/VOC/images datasets/VOC/labels && cd yolov5

### Cityscapes

Install all the dependencies:

In [None]:
%cd optimal-annotation-mix/yolov5
%pip install -qr requirements.txt

Extract _Cityscapes.zip_ to _datasets/Cityscapes_:

In [None]:
# yolov5 is the current dir
path_to_cityscapes = 'path/to/Cityscapes.zip'
!cd .. &&  mkdir -p 'datasets/Cityscapes' && unzip -nq $path_to_cityscapes -d 'datasets/' && cd yolov5

Start training

#### F = 1475, W = 1000

In [None]:
!python segment/train.py --img 1024 --batch 8 --f-size 4 --epochs 272 --data cityscapes_seg/Cityscapes-seg-fw-1475-1000.yaml --hyp hyp.scratch-low.yaml --weights yolov5x-cls.pt --cfg yolov5x-seg.yaml --cache --no-overlap --patience 0

In [None]:
!cd .. && rm -r datasets/Cityscapes/data/images datasets/Cityscapes/data/labels && cd yolov5

#### F = 800, W = 0

In [None]:
!python segment/train.py --img 1024 --batch 8 --epochs 500 --data cityscapes_seg/Cityscapes-seg-fw-800-0.yaml --hyp hyp.scratch-low.yaml --weights yolov5x-cls.pt --cfg yolov5x-seg.yaml --cache --no-overlap --patience 0

In [None]:
!cd .. && rm -r datasets/Cityscapes/data/images datasets/Cityscapes/data/labels && cd yolov5

## DETR training

Install all the dependencies:

In [None]:
%cd optimal-annotation-mix/detr
%pip install -qr requirements.txt

### Prepare pascal voc dataset

In [1]:
# In Colab, open /usr/local/lib/python3.10/dist-packages/pycocotools/cocoeval.py  
# and replace iouThr=.75 with iouThr=.70 in line 462 to get mAP70 instead of mAP75.

Download PascalVOC dataset:

In [None]:
!mkdir -p 'dataset/annotations/segmentation' 'dataset/annotations/detection'
!mkdir -p 'dataset/train' 'dataset/val'
!bash VOC2012.sh 'dataset'

In [None]:
from shutil import move, copy


path_to_VOC = 'dataset/VOCdevkit/VOC2012'

ann_dir = f'{path_to_VOC}/Annotations'
path_seg_val = f'{path_to_VOC}/ImageSets/Segmentation/val.txt'
path_mask = f'{path_to_VOC}/SegmentationObject'

def copy_data(ids_path, path_from, split):
    if isinstance(ids_path, str):
        with open(ids_path, 'r') as f:
            ids = f.read().split()
    else:
        ids = ids_path

    for i in ids:
        img_path_from = f'{path_from}/{i}.jpg'
        img_path_to = f'dataset/{split}/{i}.jpg'
        copy(img_path_from, img_path_to)


# def copy_detection():
#     for split in ['train', 'val']:
#         copy_data(f'{path_to_VOC}/ImageSets/Main/{split}.txt', f'{path_to_VOC}/JPEGImages', split)

def copy_detection_trainval():
    with open(f'{path_to_VOC}/ImageSets/Main/trainval.txt', 'r') as f:
        trainval_ids = f.read().split()

    with open(f'{path_to_VOC}/ImageSets/Segmentation/val.txt', 'r') as f:
        val_seg_ids = f.read().split()
        val_seg_ids = set(val_seg_ids)

    trainval_det = sorted([i for i in trainval_ids if i not in val_seg_ids])
    copy_data(trainval_det, f'{path_to_VOC}/JPEGImages', 'train')

    split = 'val'
    copy_data(f'{path_to_VOC}/ImageSets/Segmentation/{split}.txt', f'{path_to_VOC}/JPEGImages', split)

def copy_segmentation():
    for split in ['train', 'val']:
        copy_data(f'{path_to_VOC}/ImageSets/Segmentation/{split}.txt', f'{path_to_VOC}/JPEGImages', split)

Create validation annotations:

In [None]:
copy_detection_trainval()
copy_segmentation()

# create val.json for detection
!python voc2coco.py --ann_dir $ann_dir \
         --ann_ids $path_seg_val \
         --labels voc_labels.txt \
         --ext 'xml' \
         --extract_num_from_imgid \
         --output dataset/annotations/detection/val.json

# create val.json for segmentation
!python voc2coco.py --ann_dir $ann_dir \
         --ann_ids $path_seg_val \
         --labels voc_labels.txt \
         --ext 'xml' \
         --extract_num_from_imgid \
         --masks_path $path_mask \
         --output dataset/annotations/segmentation/val.json

Given that DETR is trained in a two-stage manner, the initial stage is the training of the detection model, followed by the fine-tuning of the mask head.

As with the YOLO, we created annotation files and placed them in _detr/data_. These files are named as follows: *train\_{task}\_{**F** size}\_{**W** size}.txt*. To train DETR with **F** = 400, **W** = 10331 use _train_det_400_10331.txt_ in the detection step and _train_seg_400.txt_ in the segmentation step.

### Detection model training

Create a training annotation for the **detecton** task.

Use 10331 detection + 400 segmentation images for the training

In [None]:
path_train_det = f'data/train_det_400_10331.txt'

!python voc2coco.py --ann_dir $ann_dir \
         --ann_ids $path_train_det \
         --labels voc_labels.txt \
         --ext 'xml' \
         --extract_num_from_imgid \
         --output dataset/annotations/detection/train.json

In [None]:
dataset_file = "voc"
dataDir='dataset'
outDirDet = 'outputs/detection/train_det_400_10331'

!python main.py \
  --dataset_file $dataset_file \
  --batch_size 10 \
  --coco_path $dataDir \
  --output_dir $outDirDet \
  --epochs 300 \
  --lr_drop 250 \
  --eos_coef 0.03 \
  --num_queries 100

### Segmentation model training

Create a training annotation for the **segmentation** task.

Use the same 400 segmentation images for training the mask head

In [None]:
path_train_seg = f'data/train_seg_400.txt'

!python voc2coco.py --ann_dir $ann_dir \
         --ann_ids $path_train_seg \
         --labels voc_labels.txt \
         --ext 'xml' \
         --extract_num_from_imgid \
         --output dataset/annotations/segmentation/train.json \
         --masks_path $path_mask

In [None]:
dataset_file = "voc"
dataDir='dataset'
outDirSeg = 'outputs/segmentation/train_seg_400_10331'

# Path to the detection model checkpoint from the previous step
frozen_weights = f'{outDirDet}/checkpoint.pth'

!python main.py \
  --dataset_file $dataset_file \
  --batch_size 3 \
  --frozen_weights $frozen_weights \
  --coco_path $dataDir \
  --output_dir $outDirSeg \
  --epochs 150 \
  --lr_drop 120 \
  --eos_coef 0.03 \
  --num_queries 100 \
  --masks