In [2]:
import csv
import math
import json
import os
import cv2

from PIL import Image, ImageDraw, ImageFont
from matplotlib import patches, patheffects
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras import Model
from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, Callback
from tensorflow.keras.layers import Conv2D, Reshape, Flatten, Dropout, Dense, Concatenate
from tensorflow.keras.layers import BatchNormalization, Activation, GlobalAveragePooling2D, UpSampling2D
from tensorflow.keras.utils import Sequence
from tensorflow.keras.backend import epsilon
from tensorflow.keras.optimizers import Adam

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

  from ._conv import register_converters as _register_converters


### Preprocessing ###
Building up our training set, validation set and images from Pascal 2007 dataset.

In [3]:
path = os.getcwd()
dataset_path = os.path.join(os.getcwd(), "dataset/Annotations")
image_path = os.path.join(os.getcwd(), "dataset/JPEGImages")

train_path = os.path.join(dataset_path, "pascal_train2007.json")
val_path = os.path.join(dataset_path, "pascal_val2007.json")

# building up training dataset
with open(train_path) as f:
    train_data = json.load(f)

train_dataset = []
for image in train_data["images"]:
    image_set = {}
    image_set["image"] = image
    
    anno_list = []
    for anno in train_data["annotations"]:
        if anno["image_id"] == image['id']:
            anno_list.append(anno)
    image_set["annotation"] = sorted(anno_list, key=lambda x:x["area"], reverse=True)[0]
    train_dataset.append(image_set)


# building up validation dataset    
with open(val_path) as f:
    val_data = json.load(f)
    
val_dataset = []
for image in val_data["images"]:
    image_set = {}
    image_set["image"] = image
    
    anno_list = []
    for anno in val_data["annotations"]:
        if anno["image_id"] == image["id"]:
            anno_list.append(anno)
    image_set["annotation"] = sorted(anno_list, key=lambda x:x["area"], reverse=True)[0]
    val_dataset.append(image_set)

# redistribute list
combine_list = train_dataset + val_dataset
train_dataset = combine_list[:int(len(combine_list) * 0.8)]
val_dataset = combine_list[int(len(combine_list) * 0.8):]
    
# converter to convert id to label name
id_to_name = {c['id']:c['name'] for c in train_data['categories']}

print(train_data.keys())
print()
print("images: {}\n".format(train_data['images'][0]))
print("annotations: {}\n".format(train_data['annotations'][0]))
print("categories: {}\n".format(train_data['categories'][0]))

dict_keys(['images', 'type', 'categories', 'annotations'])

images: {'width': 500, 'id': 12, 'file_name': '000012.jpg', 'height': 333}

annotations: {'area': 34104, 'iscrowd': 0, 'bbox': [155, 96, 196, 174], 'category_id': 7, 'id': 1, 'segmentation': [[155, 96, 155, 270, 351, 270, 351, 96]], 'ignore': 0, 'image_id': 12}

categories: {'id': 1, 'name': 'aeroplane', 'supercategory': 'none'}



In [4]:
print("Combine dataset: {}".format(len(combine_list)))
print("Train dataset: {}".format(len(train_dataset)))
print("Validate dataset: {}".format(len(val_dataset)))

Combine dataset: 5011
Train dataset: 4008
Validate dataset: 1003


In [5]:
def bb_hw(a): 
    x0 = bbox[0]
    y0 = height - bbox[1] - bbox[3]
    x1 = bbox[2]
    y1 = bbox[3]
    return  [x0, y0, x1, y1]

def draw_outline(o, lw):
    o.set_path_effects([patheffects.Stroke(
        linewidth=lw, foreground='black'), patheffects.Normal()])
    
def draw_rect(ax, b):
    # b = bb_hw(b)
    patch = ax.add_patch(patches.Rectangle(b[:2], *b[-2:], fill=False, edgecolor='white', lw=2))
    draw_outline(patch, 4)
    
def draw_text(ax, xy, txt, sz=14):
    text = ax.text(*xy, txt,
        verticalalignment='top', color='white', fontsize=sz, weight='bold')
    draw_outline(text, 1)
    
def show_img(im, figsize=(10, 10), ax=None):    
    if not ax: fig, ax = plt.subplots(figsize=figsize)            
    ax.imshow(im)    
    plt.gca().invert_yaxis()    
#     ax.get_xaxis().set_visible(False)
#     ax.get_yaxis().set_visible(False)
    return ax

def load_image(path):            
    img = cv2.imread(img_path) # read as color image
    img = img[::-1,:,:] # revise height in (height, width, channel)
    img = img[...,::-1] # flip color
    return img

Interate through train_dataset by pressing any keys and press 'q' to quit.

In [7]:
for idx, data in enumerate(train_dataset):
    data = train_dataset[idx]
    img_path = os.path.join(image_path, data["image"]['file_name'])
    height = data["image"]["height"]
    width = data["image"]["width"]

    bbox = data['annotation']['bbox']
    # bbox = bb_hw(bbox)
    print(bbox)

    img = cv2.imread(img_path, 1)
    cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[0]+bbox[2], bbox[1]+bbox[3]), (0,255,0), 2)
    cv2.imshow("image", img)

    if cv2.waitKey(0) & 0xFF == ord('q'):
        cv2.destroyAllWindows()
        break
    cv2.destroyAllWindows()

[155, 96, 196, 174]
[89, 77, 314, 259]
[2, 1, 241, 461]
[89, 124, 248, 88]
[103, 77, 272, 106]
[8, 106, 491, 157]


### Image Augmentation

In [11]:
import imgaug as ia
from imgaug import augmenters as iaa
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

def aug_image(img, bbox):
    # Sometimes(0.5, ...) applies the given augmenter in 50% of all cases,
    # e.g. Sometimes(0.5, GaussianBlur(0.3)) would blur roughly every second image.
    sometimes = lambda aug: iaa.Sometimes(0.5, aug)

    seq = iaa.Sequential(
        [
            # apply the following augmenters to most images
            iaa.Fliplr(0.5), # horizontally flip 50% of all images
            iaa.Flipud(0.2), # vertically flip 20% of all images                    
            
            # execute 0 to 3 of the following (less important) augmenters per image
            # don't execute all of them, as that would often be way too strong
            iaa.SomeOf((0, 3),
                [
                    sometimes(iaa.Affine(rotate=(-45, 45))),
                    # sometimes(iaa.Superpixels(p_replace=(0, 0.3), n_segments=(0, 100))), # convert images into their superpixel representation
                    iaa.OneOf([
                        iaa.GaussianBlur((0, 3.0)), # blur images with a sigma between 0 and 3.0
                        iaa.AverageBlur(k=(2, 7)), # blur image using local means with kernel sizes between 2 and 7
                        iaa.MedianBlur(k=(3, 11)), # blur image using local medians with kernel sizes between 2 and 7
                    ]),
                    iaa.Sharpen(alpha=(0, 1.0), lightness=(0.75, 1.5)), # sharpen images
                    # iaa.Emboss(alpha=(0, 1.0), strength=(0, 2.0)), # emboss images
                    # search either for all edges or for directed edges,
                    # blend the result with the original image using a blobby mask
                    iaa.SimplexNoiseAlpha(iaa.OneOf([
                        iaa.EdgeDetect(alpha=(0.5, 1.0)),
                        iaa.DirectedEdgeDetect(alpha=(0.5, 1.0), direction=(0.0, 1.0)),
                    ])),
                    iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5), # add gaussian noise to images
                    iaa.OneOf([
                        iaa.Dropout((0.01, 0.1), per_channel=0.5), # randomly remove up to 10% of the pixels
                        iaa.CoarseDropout((0.03, 0.15), size_percent=(0.02, 0.05), per_channel=0.2),
                    ]),
                    iaa.Invert(0.05, per_channel=True), # invert color channels
                    iaa.Add((-10, 10), per_channel=0.5), # change brightness of images (by -10 to 10 of original value)
                    iaa.AddToHueAndSaturation((-20, 20)), # change hue and saturation
                    # either change the brightness of the whole image (sometimes
                    # per channel) or change the brightness of subareas
                    iaa.OneOf([
                        iaa.Multiply((0.5, 1.5), per_channel=0.5),
                        iaa.FrequencyNoiseAlpha(
                            exponent=(-4, 0),
                            first=iaa.Multiply((0.5, 1.5), per_channel=True),
                            second=iaa.ContrastNormalization((0.5, 2.0))
                        )
                    ]),
                    iaa.ContrastNormalization((0.5, 2.0), per_channel=0.5), # improve or worsen the contrast
                    iaa.Grayscale(alpha=(0.0, 1.0)),
                    sometimes(iaa.ElasticTransformation(alpha=(0.5, 3.5), sigma=0.25)), # move pixels locally around (with random strengths)
                    sometimes(iaa.PiecewiseAffine(scale=(0.01, 0.04))), # sometimes move parts of the image around
                    # sometimes(iaa.PerspectiveTransform(scale=(0.01, 0.05)))
                ],
                random_order=True
            )
        ],
        random_order=True
    )
    
    # take note that pascal bbox is in x1, y1, width ,height
    # bbox should hold x1, y1, x2, y2
    bbs = ia.BoundingBoxesOnImage([
        ia.BoundingBox(x1=bbox[0], y1=bbox[1], x2=bbox[0]+bbox[2], y2=bbox[1]+bbox[3]),    
    ], shape=img.shape)
    
    # create fix augment sequence
    seq_det = seq.to_deterministic()

    image_aug = np.array(seq_det.augment_images([img])[0])
    bbs_aug = seq_det.augment_bounding_boxes([bbs])[0]
    
    bbox_list = []
    for i in range(len(bbs.bounding_boxes)):
        bbox_after = bbs_aug.bounding_boxes[i]
        bbox_after = [bbox_after.x1, bbox_after.y1, bbox_after.x2, bbox_after.y2]
        bbox_list.append(bbox_after)
    # take note that return augmented bbox result is in x1, y1, x2, y2
    return image_aug, bbox_list

In [12]:
IMAGE_SIZE = 300

for idx, data in enumerate(train_dataset):
    img_path = os.path.join(image_path, data["image"]['file_name'])

    bbox = data['annotation']['bbox']
    img = cv2.imread(img_path, 1)
    width = data["image"]["width"]
    height = data["image"]["height"]

    # rescale image
    img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE)) 
    x_scale = IMAGE_SIZE / width
    y_scale = IMAGE_SIZE / height
    x0, y0, x1, y1 = bbox
    bbox = [int(x0 * x_scale), int(y0 * y_scale), int(x1 * x_scale), int(y1 * y_scale)]
    
    # augment image and bbox
    img, bbox_list = aug_image(img, bbox)
    bbox = bbox_list[0]

    cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0,255,0), 2)
    cv2.imshow("image", img)

    if cv2.waitKey(0) & 0xFF == ord('q'):
        cv2.destroyAllWindows()
        break
    cv2.destroyAllWindows()