In [1]:
from fastai.vision import *
from models import Darknet
from train import hyp
from utils import torch_utils
from utils.utils import compute_loss, build_targets

# Train Ultralytics yolov3 with FastAI
It's lovely that Ultralytics ported yolov3 to Python, 
and it's nice that they made a Jupyter notebook, 
but in that notebook they just invoke their Python script with bang syntax. 
It's basically glorified bash.

They also implemented their own:
* Scripts to download data
* Progress bat
* Learning rate finder
* Data augmentation
* Optimizer

All of these things are already present in FastAI, are well tested, and have community support, 
so let's use them! In this notebook we use:
* The Ultralytics model
* The loss function

Everything else is done through FastAI

### Load Pascal VOC data
Pascal VOC is the dataset pjreddie and alexeyab train on, so for apples-to-apples, let's use that.

FastAI has the URLs baked in, downloads automatically if not present, 
stores the data in a standard location and untars it for us.

In [2]:
# https://github.com/cedrickchee/knowledge/blob/master/courses/fast.ai/deep-learning-part-2/2018-edition/lesson-8-object-detection.md
coco = untar_data(URLs.COCO_TINY)
voc2007 = untar_data(URLs.PASCAL_2007)
voc2012 = untar_data(URLs.PASCAL_2012)

### Load COCO (deprecated)
FastAI (and the rest of the ML world) likes COCO better, so to initially make this notebook work,
we used that... now we're trying to get away from it. Eventually this block will be removed.

In [3]:
images, lbl_bbox = get_annotations(coco/'train.json')
img2bbox = dict(zip(images, lbl_bbox))
get_y_func = lambda o:img2bbox[o.name]

### Read the labels
There are 5 sets total, we will follow pjreadie's lead and use 2007/test for test,
and everything else for training. Pascal gives us JSON labels, so let's use those,
but smoosh them all together into a normalized list of objects.

In [4]:
# https://pjreddie.com/darknet/yolo/#train-voc
files = [
    voc2007 / 'train.json', 
    voc2007 / 'valid.json',
    voc2007 / 'test.json',
    voc2012 / 'train.json', 
    voc2012 / 'valid.json'
    ]
jsons = [(it, json.load(it.open())) for it in files]
images = [{**img, 'file': fn} for (fn, json) in jsons for img in json["images"]]
images = { i["id"] : i for i in images }
annotations = [item for (fn, json) in jsons for item in json["annotations"]]
f"{len(images)}; {len(annotations)}"

'21503; 62199'

### Attach annotations to images
VOC gives use separate lists of images vs annotations, so let's normalize.

In [5]:
for anno in annotations:
    image = images[anno['image_id']]
    image.setdefault('annotations', []).append(anno)


### Find the one category we care about
There are 80 categories in VOC, but we are only interested in people. Let's get the label ID for them.

In [6]:
person_cat = [it for it in jsons[0][1]["categories"] if it["name"] == "person"][0]["id"]
person_cat

15

### Group by person vs not person
We'll need both positive and negative training samples, so we'll split the data up based on whether each image has a person in it.

In [7]:
def has_person(img): 
    return [] != [a for a in img['annotations'] if a['category_id'] == person_cat]
positive_samp = [img for img in images.values() if has_person(img)]
negative_samp = [img for img in images.values() if not has_person(img)]
f"{len(positive_samp)}; {len(negative_samp)}"

'8566; 12937'

Based on what we've read about ML, it seems like the positive and negative sample counts should be roughly equal, so let's truncate.


In [8]:
negative_samp = negative_samp[:len(positive_samp)]
len(negative_samp)

8566

In [9]:
samples = positive_samp + negative_samp

### Load data into FastAI
FastAI wants the data loaded with it's own classes, so let's feed our normalized data into it.

In [10]:
def get_folder(f):
    if 'train' in str(f) or 'valid' in str(f): return 'train'
    return 'test'
def make_path(p):
    return p['file'].parent / get_folder(p['file']) / p['file_name']
posix_paths = [make_path(p) for p in samples]
# lst = ImageList(posix_paths)

In [11]:
lst = ObjectItemList(posix_paths)

### Train / test split
pjreddie uses 2007 test as the validation for some reason. Let's do that:

In [15]:
def split_func(sample):
    return '2007' in str(sample['file'].parent) and 'test' in str(sample['file'])
valid = set()
for sample in samples:
    if split_func(sample):
        valid.add(make_path(sample))
len(valid)

4952

In [17]:
def get_y_func(o):
    print(o)

In [18]:
data = (lst
        .split_by_valid_func(lambda it: it in valid)                          
        .label_from_func(get_y_func)
        #How to find the labels? -> use get_y_func on the file name of the data
        .transform(get_transforms(), tfm_y=True)
        #Data augmentation? -> Standard transforms; also transform the label images
        .databunch(bs=16, collate_fn=bb_pad_collate))   
        #Finally we convert to a DataBunch, use a batch size of 16,
        # and we use bb_pad_collate to collate the data into a mini-batch

KeyboardInterrupt: 

In [None]:
data.show_batch(rows=2, ds_type=DatasetType.Valid, figsize=(6,6))


In [None]:
device = 'cpu'
arc = 'default'
cfg = 'cfg/yolov3-tiny-anchors.cfg'
device = torch_utils.select_device(device, apex=False, batch_size=64)
model = Darknet(cfg, arc=arc).to(device)

In [None]:
def loss_func(predicted, boxes, classes):
    targets = []
    bs = classes.shape[0]
    max_detections = classes.shape[1]
    for img_idx in range(bs):
        for detect_idx in range(max_detections):
            clazz = classes[img_idx, detect_idx]
            if clazz == 0: continue
            x, y, w, h = boxes[img_idx, detect_idx]
            targets.append([img_idx, float(clazz), float(x), float(y), float(w), float(h)])
    targets = tensor(targets)
    targets = build_targets(model, targets)
    loss = compute_loss(predicted, targets, model)
    return loss

In [None]:
learner = Learner(data, model, loss_func=loss_func)

In [None]:
model.arc = 'default'
model.nc = 7  # num classes
model.hyp = hyp
learner.fit(1)