## Setup

In [12]:
# imports

import tensorflow as tf
from tensorflow import keras
from keras.layers import (
    Flatten,
    Dense,
)
from keras.applications.vgg16 import VGG16
from PIL import Image
import numpy as np

In [None]:
# check cpu and gpu

num_cpus = len(tf.config.list_physical_devices("CPU"))
num_gpus = len(tf.config.list_physical_devices("GPU"))

if num_cpus > 0:
    print("CPU available. ")
else:
    print("No CPU available. ")

if num_gpus > 0:
    print("GPU available. ")
else:
    print("No GPU available. ")

## Data preparation

In [13]:
# create a dictionary of images and their lists of bounding boxes

rows = open("data/data.csv").read().strip().split("\n")

images_and_box_lists = {}

current_image = "images/0.png"
temp_box_list = []
for row in rows:
    row = row.split(",")

    if row[0] != current_image:
        images_and_box_lists.update({current_image: temp_box_list})
        current_image = row[0]
        temp_box_list = []
    temp_box_list.append([int(row[1]), int(row[2]), int(row[3]), int(row[4])])
images_and_box_lists.update({current_image: temp_box_list})

In [14]:
def intersection_over_union(box1, box2):
    box1_x1 = box1[0]
    box1_y1 = box1[1]
    box1_w = box1[2]
    box1_h = box1[3]

    box1_x2 = box1_x1 + box1_w
    box1_y2 = box1_y1 + box1_h

    box2_x1 = box2[0]
    box2_y1 = box2[1]
    box2_w = box2[2]
    box2_h = box2[3]

    box2_x2 = box2_x1 + box2_w
    box2_y2 = box2_y1 + box2_h

    intersection_x1 = max(box1_x1, box2_x1)
    intersection_y1 = max(box1_y1, box2_y1)
    intersection_x2 = min(box1_x2, box2_x2)
    intersection_y2 = min(box1_y2, box2_y2)

    intersection_area = max(0, intersection_x2 - intersection_x1) * max(
        0, intersection_y2 - intersection_y1
    )

    box1_area = box1_w * box1_h
    box2_area = box2_w * box2_h

    return intersection_area / float(box1_area + box2_area - intersection_area)

In [15]:
# split images and make labels list (0:55)

images = []
labels = []
ious = []

w = 320
h = 360

for key in images_and_box_lists:
    full_image = Image.open("data/" + key)

    # loop through windows
    for window_y1 in range(0, 1800, h):
        for window_x1 in range(0, 2880, w):
            window_x2 = window_x1 + w
            window_y2 = window_y1 + h

            cropped_image = full_image.crop(
                [window_x1, window_y1, window_x2, window_y2]
            )
            label = 0

            # get max iou for all balloon boxes in list
            window_box = [window_x1, window_y1, w, h]
            max_iou = 0
            for balloon_box in images_and_box_lists.get(key):
                iou = intersection_over_union(window_box, balloon_box)

                if iou > max_iou:
                    max_iou = iou

            ious.append(max_iou)

            if max_iou > 0.2:
                label = 1

            processed_image = cropped_image.resize([32, 36])
            processed_image = np.asarray(processed_image)
            processed_image = processed_image.astype("float32") / 255
            images.append(processed_image)
            labels.append(label)

In [None]:
# check data

from PIL import ImageDraw, ImageFont

font = ImageFont.load_default(100)

index = 0
for path in ["images/", "flipped_images/"]:
    for file in range(0, 200):
        image = Image.open("data/" + path + str(file) + ".png")

        # draw box
        draw = ImageDraw.Draw(image)

        for y in range(0, 1800, h):
            for x in range(0, 2880, w):
                draw.rectangle(((x, y), (x + 320, y + 360)), outline="black", width=3)

                if labels[index] == 1:
                    draw.rectangle(((x, y), (x + 320, y + 360)), outline="red", width=6)
                    draw.text((x+70, y+180), str(round(ious[index], 2)), font = font, fill="black", stroke_fill="white", stroke_width=3)

                index += 1

        image.save("labeled_images/" + path + str(file) + ".png")

## Different network models

In [None]:
# vgg16

vgg = VGG16(
    weights="imagenet",
    include_top=False,
    input_tensor=keras.layers.Input(shape=(36, 32, 3)),
)

vgg.trainable = False

window_x1 = vgg.output
window_x1 = Flatten()(window_x1)
window_x1 = Dense(128, activation="relu")(window_x1)
window_x1 = Dense(64, activation="relu")(window_x1)
window_x1 = Dense(32, activation="relu")(window_x1)
window_x1 = Dense(1, activation="sigmoid")(window_x1)
model = keras.Model(inputs=vgg.input, outputs=window_x1)

## Network building

In [None]:
# split data

train_images, test_images = np.split(images, [int(len(images) * 0.8)])
train_boxes, test_boxes = np.split(labels, [int(len(labels) * 0.8)])

In [None]:
# train

model.compile(
    loss=keras.losses.MeanSquaredError(reduction="sum_over_batch_size", name="mse"),
    optimizer=keras.optimizers.Adam(learning_rate=1e-4),
)

model.fit(
    train_images,
    train_boxes,
    epochs=20,
    validation_data=(test_images, test_boxes),
    verbose=1,
    shuffle=True,
)

In [None]:
# test

model.evaluate(test_images, test_boxes, verbose=1)

In [None]:
# save

model.save("models/vgg_good_20_20.keras")