## Setup

In [1]:
# references

# refrenced from https://towardsdatascience.com/implementing-a-fully-convolutional-network-fcn-in-tensorflow-2-3c46fb61de3b
# also https://pyimagesearch.com/2020/10/05/object-detection-bounding-box-regression-with-keras-tensorflow-and-deep-learning/
# used https://www.robots.ox.ac.uk/~vgg/software/via/via_demo.html to annotate images

In [2]:
# imports

import tensorflow as tf
from tensorflow import keras
from keras.layers import (
    Conv2D,
    MaxPooling2D,
    Dropout,
    BatchNormalization,
    Flatten,
    Dense,
)
from keras.applications.vgg16 import VGG16
from PIL import Image
from PIL import ImageDraw
import numpy as np



In [4]:
# check gpu

num_cpus = len(tf.config.list_physical_devices('CPU'))
num_gpus = len(tf.config.list_physical_devices('GPU'))

if num_cpus > 0:
    print("CPU available. ")
else:
    print("No CPU available. ")

if num_gpus > 0:
    print("GPU available. ")
else:
    print("No GPU available. ")

CPU available. 
No GPU available. 


## Data preparation

In [16]:
# box preparation

rows = open("data/data.csv").read().strip().split("\n")
images = []
boxes = []
image_names = []
for row in rows:
    row = row.split(",")
    images.append(row[0])
    image_names.append(row[0])
    boxes.append(
        [
            int(row[1]) / 2880,
            int(row[2]) / 1800,
            int(row[3]) / 2880,
            int(row[4]) / 1800,
        ]
    )
boxes = np.array(boxes)

In [17]:
# image preperation

image_resolution = 20
for i in range(len(images)):
    image = Image.open("data/" + images[i])
    image = image.resize(
        [image.width // image_resolution, image.height // image_resolution]
    )
    image = np.asarray(image)
    image = image.astype("float32") / 255
    images[i] = image
images = np.array(images)

In [20]:
# reset image copy arrays

images_copy = images.copy()

In [21]:
# draw processed train images with boxes

for i in range(len(images_copy)):
    # convert to PIL
    img = images_copy[i]
    img *= 255
    img = np.uint8(img)
    img = Image.fromarray(img)

    # draw box
    draw = ImageDraw.Draw(img)
    p1 = (int(boxes[i][0] * 2880//20), int(boxes[i][1] * 1800//20))
    p2 = (int(boxes[i][2] * 2880//20) + p1[0], int(boxes[i][3] * 1800//20) + p1[1])
    draw.rectangle((p1, p2), outline="red")

    img.save("boxed_images/" + image_names[i][:-4] + "uwu" + str(i) + ".png")

## Different network models

In [6]:
# my network

# input layer
input = keras.layers.Input(shape=(1800 // image_resolution, 2880 // image_resolution, 3))

# processing layers
x = Conv2D(filters=64, kernel_size=(3, 3), activation="relu")(input)
x = Conv2D(filters=64, kernel_size=(3, 3), activation="relu")(x)
x = MaxPooling2D(pool_size=(2, 2))(x)  # gets rid of unneeded detail in the image
x = Dropout(rate=0.2)(x)  # prevents reliance on certain pixels

x = BatchNormalization()(x)  # recentering and rescaling
x = Conv2D(filters=64, kernel_size=(3, 3), activation="relu")(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(rate=0.2)(x)

x = BatchNormalization()(x)
x = Conv2D(filters=64, kernel_size=(3, 3), activation="relu")(x)
x = Conv2D(filters=64, kernel_size=(3, 3), activation="relu")(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(rate=0.2)(x)

x = BatchNormalization()(x)
x = Conv2D(filters=64, kernel_size=(3, 3), activation="relu")(x)
x = Conv2D(filters=64, kernel_size=(3, 3), activation="relu")(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(rate=0.2)(x)

# output layers
x = BatchNormalization()(x)
x = Flatten()(x)  # makes the array 1 dimensional
x = Dense(128, activation="relu")(x)
x = Dense(64, activation="relu")(x)
x = Dense(32, activation="relu")(x)
output = Dense(4, activation="sigmoid")(x)

# actually create the model
model = keras.Model(inputs=input, outputs=output)

In [33]:
# transfer learning with vgg16

vgg = VGG16(
    weights="imagenet",
    include_top=False,
    input_tensor=keras.layers.Input(shape=(1800 // image_resolution, 2880 // image_resolution, 3)),
)

vgg.trainable = False

x = vgg.output
x = Flatten()(x)
x = Dense(128, activation="relu")(x)
x = Dense(64, activation="relu")(x)
x = Dense(32, activation="relu")(x)
x = Dense(4, activation="sigmoid")(x)
model = keras.Model(inputs=vgg.input, outputs=x)

model.summary()

## Network building

In [41]:
# split data

train_images, test_images = np.split(images, [int(len(images) * 0.8)])
train_boxes, test_boxes = np.split(boxes, [int(len(boxes) * 0.8)])

In [42]:
# train

model.compile(
    loss=keras.losses.MeanSquaredError(reduction="sum_over_batch_size", name="mse"),
    optimizer=keras.optimizers.Adam(learning_rate=1e-4),
)

model.fit(
    train_images,
    train_boxes,
    epochs=20,
    validation_data=(test_images, test_boxes),
    verbose=1,
    shuffle=True,
)

Epoch 1/20
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m154s[0m 3s/step - loss: 0.0713 - val_loss: 0.0385
Epoch 2/20
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m157s[0m 3s/step - loss: 0.0316 - val_loss: 0.0338
Epoch 3/20
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 3s/step - loss: 0.0292 - val_loss: 0.0332
Epoch 4/20
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m126s[0m 3s/step - loss: 0.0281 - val_loss: 0.0333
Epoch 5/20
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 3s/step - loss: 0.0296 - val_loss: 0.0332
Epoch 6/20
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 3s/step - loss: 0.0276 - val_loss: 0.0332
Epoch 7/20
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 2s/step - loss: 0.0266 - val_loss: 0.0329
Epoch 8/20
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 2s/step - loss: 0.0271 - val_loss: 0.0328
Epoch 9/20
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x26b99753a90>

In [43]:
# test

model.evaluate(
    test_images,
    test_boxes,
    verbose=1
)

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 2s/step - loss: 0.0323


0.03264449164271355

In [44]:
# save

model.save("models/vgg_good_20_20.keras")