In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf
import imageio as iio
import os
import xml.etree.ElementTree as ET
import cv2
import numpy as np
from PIL import Image
from numba import cuda  # https://stackoverflow.com/a/52354865/6476994
from sklearn.metrics import classification_report
from keras.utils.np_utils import to_categorical
from sklearn.preprocessing import LabelEncoder

In [2]:
# allows all images to be displayed at once (else only displays the last call to plt.imshow())
# https://stackoverflow.com/a/41210974
def displayImage(image, caption = None, colour = None) -> None:
    plt.figure()
    if(colour != None):
        plt.imshow(image, cmap=colour)
    else:
        plt.imshow(image)
        
    if(caption != None):
        # display caption below picture (https://stackoverflow.com/a/51486361)
        plt.figtext(0.5, 0.01, caption, wrap=True, horizontalalignment='center', fontsize=12)

# ZFNet

Source: https://towardsdatascience.com/zfnet-an-explanation-of-paper-with-code-f1bd6752121d

In [3]:
# free up GPU
cuda.select_device(0)
cuda.close()

In [4]:
# mnist = tf.keras.datasets.mnist
# (training_images, training_labels), (test_images, test_labels) = mnist.load_data()

In [5]:
# get the all original output filenames
# https://stackoverflow.com/a/3207973
all_image_filenames = next(os.walk('data/BB01'),
                     (None, None, []))[2]  # [] if no file
all_image_filenames.sort()

# split files into two groups
# training_images_filenames = all_image_filenames[:len(all_image_filenames)//2]
# test_images_filenames = all_image_filenames[len(all_image_filenames)//2:]
training_images_filenames = all_image_filenames[0:200]
test_images_filenames = all_image_filenames[201:285]

training_images = []
for train_img_fn in training_images_filenames:
    im = Image.open('data/BB01/{}'.format(train_img_fn))
    training_images.append(np.asarray(im))
# convert list of numpy arrays to numpy array of numpy arrays
# https://stackoverflow.com/a/27516930/6476994
training_images = np.stack(training_images, axis=0)
    
test_images = []
for test_img_fn in test_images_filenames:
    im = Image.open('data/BB01/{}'.format(test_img_fn))
    test_images.append(np.asarray(im))
test_images = np.stack(test_images, axis=0)

In [6]:
# labels
label_encoder = LabelEncoder()

all_annotations_filenames = next(os.walk('annotations/BB01'),
                     (None, None, []))[2]  # [] if no file
all_annotations_filenames.sort()

# split files into two groups
# training_annotations_filenames = all_annotations_filenames[:len(all_annotations_filenames)//2]
# test_annotations_filenames = all_annotations_filenames[len(all_annotations_filenames)//2:]
training_annotations_filenames = all_annotations_filenames[0:200]
test_annotations_filenames = all_annotations_filenames[201:285]

training_labels = []
for train_ann_filename in training_annotations_filenames:
    # https://www.geeksforgeeks.org/reading-and-writing-xml-files-in-python/#:~:text=To%20read%20an%20XML%20file,xml%20file%20using%20getroot().
    tree = ET.parse('annotations/BB01/{}'.format(train_ann_filename))
    root = tree.getroot()
    try:
        training_labels.append(root[6][0].text)
    except:
        training_labels.append('none')
# integer-encode labels so they can be one-hot-encoded
# https://stackoverflow.com/a/56227965/6476994
training_labels = np.array(training_labels)
training_labels = label_encoder.fit_transform(training_labels)


test_labels = []
for test_ann_filename in test_annotations_filenames:
    tree = ET.parse('annotations/BB01/{}'.format(test_ann_filename))
    root = tree.getroot()
    try:
        test_labels.append(root[6][0].text)
    except:
        test_labels.append('none')
# integer-encode labels so they can be one-hot-encoded
test_labels = np.array(test_labels)
test_labels = label_encoder.fit_transform(test_labels)

## Train the model

In [7]:
# training_images = training_images[:1000]
# training_labels = training_labels[:1000]
# test_images = test_images[:100]
# test_labels = test_labels[:100]

training_images = tf.image.resize(training_images, [224, 224]).numpy()
test_images = tf.image.resize(test_images, [224, 224]).numpy()

# print('training_images before lamda: {}/{}'.format(training_images.shape, training_images.ndim))

# training_images = tf.map_fn(lambda i: tf.stack([i]*3, axis=-1), training_images).numpy()
# test_images = tf.map_fn(lambda i: tf.stack([i]*3, axis=-1), test_images).numpy()

# print('training_images after lamda: {}/{}'.format(training_images.shape, training_images.ndim))

training_images = tf.image.resize(training_images, [224, 224]).numpy()
test_images = tf.image.resize(test_images, [224, 224]).numpy()

# print('training_images shape: {}'.format(training_images.shape))
# print('training_labels shape: {}'.format(training_labels.shape))
# print('test_images shape: {}'.format(test_images.shape))
# print('test_labels shape: {}'.format(test_labels.shape))

training_images = training_images.reshape(200, 224, 224, 3)
training_images = training_images / 255.0
test_images = test_images.reshape(84, 224, 224, 3)
test_images = test_images / 255.0

training_labels = tf.keras.utils.to_categorical(training_labels, num_classes=10)
test_labels = tf.keras.utils.to_categorical(test_labels, num_classes=10)

# num_len_train = int(0.8 * len(training_images))
num_len_train = 84

ttraining_images = training_images[:num_len_train]
ttraining_labels = training_labels[:num_len_train]

valid_images = training_images[num_len_train:]
valid_labels = training_labels[num_len_train:]

training_images = ttraining_images
training_labels = ttraining_labels

model = tf.keras.models.Sequential([
                                    
		tf.keras.layers.Conv2D(96, (7, 7), strides=(2, 2), activation='relu',
			input_shape=(224, 224, 3)),
		tf.keras.layers.MaxPooling2D(3, strides=2),
    tf.keras.layers.Lambda(lambda x: tf.image.per_image_standardization(x)),

		tf.keras.layers.Conv2D(256, (5, 5), strides=(2, 2), activation='relu'),
		tf.keras.layers.MaxPooling2D(3, strides=2),
    tf.keras.layers.Lambda(lambda x: tf.image.per_image_standardization(x)),

		tf.keras.layers.Conv2D(384, (3, 3), activation='relu'),

		tf.keras.layers.Conv2D(384, (3, 3), activation='relu'),

		tf.keras.layers.Conv2D(256, (3, 3), activation='relu'),

		tf.keras.layers.MaxPooling2D(3, strides=2),

    tf.keras.layers.Flatten(),

		tf.keras.layers.Dense(4096),

		tf.keras.layers.Dense(4096),

		tf.keras.layers.Dense(10, activation='softmax')
	])


model.compile(optimizer=tf.keras.optimizers.SGD(lr=0.01, momentum=0.9), \
              loss='categorical_crossentropy', \
              metrics=['accuracy', tf.keras.metrics.TopKCategoricalAccuracy(5)])

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', \
                                            		factor=0.1, patience=1, \
																								min_lr=0.00001)
# print('training_images length: {}'.format(len(training_images)))
# print('training_labels length: {}'.format(len(training_labels)))

model.fit(training_images, training_labels, batch_size=128, \
          validation_data=(valid_images, valid_labels), \
					epochs=90, callbacks=[reduce_lr])

2022-05-17 15:20:44.659948: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-17 15:20:44.685917: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-17 15:20:44.686100: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-05-17 15:20:44.686784: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags

Epoch 1/90


  super(SGD, self).__init__(name, **kwargs)
2022-05-17 15:20:47.177021: I tensorflow/stream_executor/cuda/cuda_dnn.cc:368] Loaded cuDNN version 8400
2022-05-17 15:20:47.617506: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory


Epoch 2/90
Epoch 3/90
Epoch 4/90
Epoch 5/90
Epoch 6/90
Epoch 7/90
Epoch 8/90
Epoch 9/90
Epoch 10/90
Epoch 11/90
Epoch 12/90
Epoch 13/90
Epoch 14/90
Epoch 15/90
Epoch 16/90
Epoch 17/90
Epoch 18/90
Epoch 19/90
Epoch 20/90
Epoch 21/90
Epoch 22/90
Epoch 23/90
Epoch 24/90
Epoch 25/90
Epoch 26/90
Epoch 27/90
Epoch 28/90
Epoch 29/90
Epoch 30/90
Epoch 31/90
Epoch 32/90
Epoch 33/90
Epoch 34/90
Epoch 35/90
Epoch 36/90
Epoch 37/90
Epoch 38/90
Epoch 39/90
Epoch 40/90
Epoch 41/90
Epoch 42/90
Epoch 43/90
Epoch 44/90
Epoch 45/90
Epoch 46/90
Epoch 47/90
Epoch 48/90
Epoch 49/90
Epoch 50/90
Epoch 51/90
Epoch 52/90
Epoch 53/90
Epoch 54/90
Epoch 55/90
Epoch 56/90
Epoch 57/90
Epoch 58/90
Epoch 59/90
Epoch 60/90
Epoch 61/90
Epoch 62/90
Epoch 63/90
Epoch 64/90
Epoch 65/90
Epoch 66/90
Epoch 67/90
Epoch 68/90
Epoch 69/90
Epoch 70/90
Epoch 71/90
Epoch 72/90
Epoch 73/90
Epoch 74/90
Epoch 75/90
Epoch 76/90
Epoch 77/90
Epoch 78/90
Epoch 79/90
Epoch 80/90
Epoch 81/90
Epoch 82/90
Epoch 83/90
Epoch 84/90
Epoch 85/90


<keras.callbacks.History at 0x7f90501869a0>

## Evaluate the trained model

In [8]:
# print('test_images length: {}'.format(len(test_images)))
# print('test_labels length: {}'.format(len(test_labels)))

model.evaluate(test_images,test_labels)



[9.34968090057373, 0.0476190485060215, 0.1190476194024086]

In [9]:
predictions = (model.predict(test_images) > 0.5).astype("int32")

In [10]:
print(classification_report(test_labels, predictions))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       0.00      0.00      0.00        74
           2       0.12      0.50      0.20         8
           3       0.00      0.00      0.00         0
           4       0.00      0.00      0.00         0
           5       0.00      0.00      0.00         0
           6       0.00      0.00      0.00         0
           7       0.00      0.00      0.00         0
           8       0.00      0.00      0.00         0
           9       0.00      0.00      0.00         0

   micro avg       0.05      0.05      0.05        84
   macro avg       0.01      0.05      0.02        84
weighted avg       0.01      0.05      0.02        84
 samples avg       0.05      0.05      0.05        84



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Free up the GPU's memory

In [11]:
cuda.select_device(0)
cuda.close()