In [9]:
# references

# refrenced from https://towardsdatascience.com/implementing-a-fully-convolutional-network-fcn-in-tensorflow-2-3c46fb61de3b
# also https://pyimagesearch.com/2020/10/05/object-detection-bounding-box-regression-with-keras-tensorflow-and-deep-learning/
# used https://www.robots.ox.ac.uk/~vgg/software/via/via_demo.html to annotate images

In [2]:
# imports

import tensorflow as tf
from tensorflow import keras
from keras.layers import (
    Conv2D,
    MaxPooling2D,
    Dropout,
    BatchNormalization,
    Flatten,
    Dense,
)
from keras.applications.vgg16 import VGG16
from PIL import Image
from PIL import ImageDraw
import numpy as np



In [4]:
# check gpu

print(tf.config.list_physical_devices('CPU'))
print(tf.config.list_physical_devices('GPU'))

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]
[]


In [3]:
# box preparation

train_rows = open("data/train/train.csv").read().strip().split("\n")
train_images = []
train_boxes = []
train_image_names = []
for row in train_rows:
    row = row.split(",")
    train_images.append(row[0])
    train_image_names.append(row[0])
    train_boxes.append(
        [
            int(row[1]) / 2880,
            int(row[2]) / 1800,
            int(row[3]) / 2880,
            int(row[4]) / 1800,
        ]
    )
train_boxes = np.array(train_boxes)

test_rows = open("data/test/test.csv").read().strip().split("\n")
test_images = []
test_boxes = []
test_image_names = []
for row in test_rows:
    row = row.split(",")
    test_images.append(row[0])
    test_image_names.append(row[0])
    test_boxes.append(
        [
            int(row[1]) / 2880,
            int(row[2]) / 1800,
            int(row[3]) / 2880,
            int(row[4]) / 1800,
        ]
    )
test_boxes = np.array(test_boxes)

In [5]:
# image preperation

image_resolution = 20
for i in range(len(train_images)):
    image = Image.open("data/train/" + train_images[i])
    image = image.convert("RGB").resize(
        [image.width // image_resolution, image.height // image_resolution]
    )
    image = np.asarray(image)
    image = image.astype("float32") / 255
    train_images[i] = image
train_images = np.array(train_images)

for i in range(len(test_images)):
    image = Image.open("data/test/" + test_images[i])
    image = image.convert("RGB").resize(
        [image.width // image_resolution, image.height // image_resolution]
    )
    image = np.asarray(image)
    image = image.astype("float32") / 255
    test_images[i] = image
test_images = np.array(test_images)

In [6]:
# reset image copy arrays

train_images_copy = train_images.copy()
test_images_copy = test_images.copy()

In [8]:
for i in range(len(train_images_copy)):
    # convert to PIL
    img = train_images_copy[i]
    img *= 255
    img = np.uint8(img)
    img = Image.fromarray(img)

    # draw box
    draw = ImageDraw.Draw(img)
    p1 = (int(train_boxes[i][0] * 2880//20), int(train_boxes[i][1] * 1800//20))
    p2 = (int(train_boxes[i][2] * 2880//20) + p1[0], int(train_boxes[i][3] * 1800//20) + p1[1])
    draw.rectangle((p1, p2), outline="black")

    img.save("boxed_images/train/" + str(i) + train_image_names[i][-6:])

In [9]:
for i in range(len(test_images_copy)):
    img = test_images_copy[i]
    img *= 255
    img = np.uint8(img)
    img = Image.fromarray(img)

    draw = ImageDraw.Draw(img)
    p1 = (int(test_boxes[i][0] * 2880//20), int(test_boxes[i][1] * 1800//20))
    p2 = (int(test_boxes[i][2] * 2880//20) + p1[0], int(test_boxes[i][3] * 1800//20) + p1[1])
    draw.rectangle((p1, p2), outline="black")

    img.save("boxed_images/test/" + str(i) + test_image_names[i][-5:])

In [11]:
# my network

# input layer
input = keras.layers.Input(shape=(1800 // image_resolution, 2880 // image_resolution, 3))

# processing layers
x = Conv2D(filters=64, kernel_size=(3, 3), activation="relu")(input)
x = Conv2D(filters=64, kernel_size=(3, 3), activation="relu")(x)
x = MaxPooling2D(pool_size=(2, 2))(x)  # gets rid of unneeded detail in the image
x = Dropout(rate=0.2)(x)  # prevents reliance on certain pixels

x = BatchNormalization()(x)  # recentering and rescaling
x = Conv2D(filters=64, kernel_size=(3, 3), activation="relu")(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(rate=0.2)(x)

x = BatchNormalization()(x)
x = Conv2D(filters=64, kernel_size=(3, 3), activation="relu")(x)
x = Conv2D(filters=64, kernel_size=(3, 3), activation="relu")(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(rate=0.2)(x)

# output layers
x = BatchNormalization()(x)
x = Flatten()(x)  # makes the array 1 dimensional
x = Dense(128, activation="relu")(x)
x = Dense(64, activation="relu")(x)
x = Dense(32, activation="relu")(x)
output = Dense(4, activation="sigmoid")(x)

# actually create the model
model = keras.Model(inputs=input, outputs=output)
model.summary()

In [103]:
# transfer learning with vgg16

vgg = VGG16(
    weights="imagenet",
    include_top=False,
    input_tensor=keras.layers.Input(shape=(1800 // image_resolution, 2880 // image_resolution, 3)),
)

vgg.trainable = False

x = vgg.output
x = Flatten()(x)
x = Dense(128, activation="relu")(x)
x = Dense(64, activation="relu")(x)
x = Dense(32, activation="relu")(x)
x = Dense(4, activation="sigmoid")(x)
model = keras.Model(inputs=vgg.input, outputs=x)

model.summary()

In [104]:
# train

model.compile(
    loss=keras.losses.MeanSquaredError(reduction="sum_over_batch_size", name="mse"),
    optimizer=keras.optimizers.Adam(learning_rate=1e-4),
)

model.fit(
    train_images,
    train_boxes,
    epochs=20,
    validation_data=(test_images, test_boxes),
    verbose=1,
    shuffle=True,
)

Epoch 1/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 2s/step - loss: 0.1950 - val_loss: 0.1545
Epoch 2/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 2s/step - loss: 0.1539 - val_loss: 0.1744
Epoch 3/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 2s/step - loss: 0.1480 - val_loss: 0.1437
Epoch 4/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 2s/step - loss: 0.1346 - val_loss: 0.1377
Epoch 5/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 2s/step - loss: 0.1407 - val_loss: 0.1409
Epoch 6/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 2s/step - loss: 0.1400 - val_loss: 0.1406
Epoch 7/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 2s/step - loss: 0.1252 - val_loss: 0.1372
Epoch 8/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 2s/step - loss: 0.1311 - val_loss: 0.1404
Epoch 9/20
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

<keras.src.callbacks.history.History at 0x2320da0b8d0>

In [112]:
# test

model.evaluate(
    test_images,
    test_boxes,
    verbose=1
)

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1s/step - loss: 0.0663


0.06860119104385376

In [113]:
# save

model.save("models/vgg_flippedfixed_20_20.keras")