## Crosswalk Model
Below we train a CNN model to detect crosswalks

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import os
import PIL
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
import pathlib
import pandas as pd
from PIL import Image 
from PIL.ImageDraw import Draw

## Data Import ##
Let's import our data

In [5]:
train_cvs = './data/train/_annotations.csv'
train_images = './data/train/'

test_cvs = './data/test/_annotations.csv'
test_images = './data/test/'

valid_cvs = './data/valid/_annotations.csv'
valid_images = './data/valid/'

classes = ['crosswalk']

def load_data(csv_file, image_dir):
    training_image_records = pd.read_csv(csv_file)
    train_image_path = os.path.join(os.getcwd(), image_dir)

    train_images = []
    train_targets = []
    train_labels = []

    for index, row in training_image_records.iterrows():
        
        (filename, width, height, class_name, xmin, ymin, xmax, ymax) = row
        
        train_image_fullpath = os.path.join(image_dir, filename)
        train_img = keras.preprocessing.image.load_img(train_image_fullpath, target_size=(height, width))
        train_img_arr = keras.preprocessing.image.img_to_array(train_img)
        
        train_images.append(train_img_arr)
        train_targets.append((xmin, ymin, xmax, ymax))
        train_labels.append(classes.index(class_name))

    return np.array(train_labels), np.array(train_targets), np.array(train_images)

train_labels, train_targets, train_images = load_data(train_cvs, train_images)
test_labels, test_targets, test_images = load_data(test_cvs, test_images)
valid_labels, valid_targets, valid_images = load_data(valid_cvs, valid_images)

## Our Model ##
We create a CNN model which has the ability to show bounding boxes

In [6]:
height, width = 640, 640
input_shape = (height, width, 3)
input_layer = tf.keras.layers.Input(input_shape)

#create the base layers
base_layers = layers.experimental.preprocessing.Rescaling(1./255, name='bl_1')(input_layer)
base_layers = layers.Conv2D(16, 3, padding='same', activation='relu', name='bl_2')(base_layers)
base_layers = layers.MaxPooling2D(name='bl_3')(base_layers)
base_layers = layers.Conv2D(32, 3, padding='same', activation='relu', name='bl_4')(base_layers)
base_layers = layers.MaxPooling2D(name='bl_5')(base_layers)
base_layers = layers.Conv2D(64, 3, padding='same', activation='relu', name='bl_6')(base_layers)
base_layers = layers.MaxPooling2D(name='bl_7')(base_layers)
base_layers = layers.Flatten(name='bl_8')(base_layers)

#create the classifier branch
classifier_branch = layers.Dense(128, activation='relu', name='cl_1')(base_layers)
classifier_branch = layers.Dense(len(classes), name='cl_head')(classifier_branch) 

#create the localiser branch
locator_branch = layers.Dense(128, activation='relu', name='bb_1')(base_layers)
locator_branch = layers.Dense(64, activation='relu', name='bb_2')(locator_branch)
locator_branch = layers.Dense(32, activation='relu', name='bb_3')(locator_branch)
locator_branch = layers.Dense(4, activation='sigmoid', name='bb_head')(locator_branch)

model = tf.keras.Model(input_layer, outputs=[classifier_branch, locator_branch])
losses = {"cl_head": tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), "bb_head": tf.keras.losses.MSE}
model.compile(loss=losses, optimizer='Adam', metrics=['accuracy'])

In [7]:
trainTargets = {
    "cl_head": train_labels,
    "bb_head": train_targets
}
validTargets = {
    "cl_head": valid_labels,
    "bb_head": valid_targets
}

training_epochs = 10

history = model.fit(
            train_images, trainTargets,
            validation_data=(valid_images, validTargets),
            batch_size=4,
            epochs=training_epochs,
            shuffle=True,
            verbose=1
        )

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [8]:
history.history

{'loss': [164698.796875,
  164677.859375,
  164677.859375,
  164677.859375,
  164677.875,
  164677.859375,
  164677.859375,
  164677.875,
  164677.859375,
  164677.90625],
 'cl_head_loss': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 'bb_head_loss': [164698.796875,
  164677.859375,
  164677.859375,
  164677.859375,
  164677.875,
  164677.859375,
  164677.859375,
  164677.875,
  164677.859375,
  164677.90625],
 'cl_head_accuracy': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
 'bb_head_accuracy': [0.138364776968956,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0],
 'val_loss': [171771.546875,
  171771.546875,
  171771.546875,
  171771.546875,
  171771.546875,
  171771.546875,
  171771.546875,
  171771.546875,
  171771.546875,
  171771.546875],
 'val_cl_head_loss': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 'val_bb_head_loss': [171771.546875,
  171771.546875,
  171771.546875,
  171771.546875,
  171771.546875,
  171771.546875,
  171771.546875,
  1717

In [9]:
testTargets = {
    "cl_head": test_labels,
    "bb_head": test_targets
}

results = model.evaluate(test_images, testTargets, batch_size=128)
print("test loss, test acc:", results)

test loss, test acc: [152123.5, 0.0, 152123.5, 1.0, 0.0]


In [16]:
# make bounding box predictions on the input image
preds = model.predict(test_images)[0]
print(preds)
(startX, startY, endX, endY) = preds
# load the input image (in OpenCV format), resize it such that it
# fits on our screen, and grab its dimensions
image = cv2.imread(imagePath)
image = imutils.resize(image, width=600)
(h, w) = image.shape[:2]
# scale the predicted bounding box coordinates based on the image
# dimensions
startX = int(startX * w)
startY = int(startY * h)
endX = int(endX * w)
endY = int(endY * h)
# draw the predicted bounding box on the image
cv2.rectangle(image, (startX, startY), (endX, endY),
    (0, 255, 0), 2)
# show the output image
cv2.imshow("Output", image)
cv2.waitKey(0)

[[-0.10527883]
 [-0.09087132]
 [-0.09087132]
 [-0.10897115]
 [-0.09434853]
 [-0.04892787]
 [-0.11188803]
 [-0.11188803]
 [-0.1216284 ]
 [-0.0946286 ]
 [-0.0946286 ]
 [-0.10151491]
 [-0.06746612]
 [-0.06746612]
 [-0.11542323]
 [-0.10449751]
 [-0.11054926]
 [-0.13268134]
 [-0.12694566]
 [-0.02925179]
 [-0.07820338]
 [-0.10469791]
 [-0.11056319]
 [-0.09298025]
 [-0.09298035]
 [-0.04069618]
 [-0.04069618]]


ValueError: too many values to unpack (expected 4)