In [2]:
%pip install --user tensorflow opencv-python matplotlib imutils scikit-learn keras ssd pandas

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.0 -> 23.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [5]:
import os
import cv2
import json

import numpy as np
import pandas as pd
import tensorflow as tf
import keras
import keras.layers
from matplotlib import pyplot as plt
from PIL import Image  
import pickle
import os
import numpy as np
import cv2
import glob

In [6]:
# Define the path to the dataset
data_path = "data/WIDERFace"
AUTOTUNE = tf.data.AUTOTUNE
batch_size = 32

# Define the paths to the train, test, and val sets
train_path = os.path.join(data_path, "WIDER_train/images")
test_path = os.path.join(data_path, "WIDER_test/images")
val_path = os.path.join(data_path, "WIDER_val/images")


train_labels_path = ("data/WIDERFace/wider_face_split/wider_face_train_bbx_gt.txt")
test_labels_path = ("data/WIDERFace/wider_face_split/wider_face_test_filelist.txt")
val_labels_path = ("data/WIDERFace/wider_face_split/wider_face_val_bbx_gt.txt")


In [62]:
import os

def load_wider_annotations(annotations_file):
    """
    Load annotations for each image in the WIDER Face dataset from a text file.
    
    Args:
    - annotations_file: str, path to the annotations file.
    
    Returns:
    - A dictionary where the keys are image file names and the values are lists of bounding boxes for that image.
    """
    annotations = {}
    
    with open(annotations_file, 'r') as f:
        lines = f.readlines()
        
    for line in lines:
        line = line.strip()
        
        # Split the line into components
        components = line.split(' ')
        image_path = components[0]
        bbox = list(map(int, components[1:]))
        
        # Convert the image path to a file name
        image_file = os.path.basename(image_path)
        
        # Add the bbox to the list of bboxes for this image
        if image_file in annotations:
            annotations[image_path].append(bbox)
        else:
            annotations[image_path] = [bbox]
    
    return annotations

In [63]:
train_annotations = load_wider_annotations(train_labels_path)
test_annotations = load_wider_annotations(test_labels_path)
val_annotations = load_wider_annotations(val_labels_path)

print("Number of training images: {}".format(len(train_annotations)))
print("Number of test images: {}".format(len(test_annotations)))
print("Number of validation images: {}".format(len(val_annotations)))

print(list(train_annotations.keys())[22])
print(list(train_annotations.values())[22])
print(list(train_annotations.items())[22])

Number of training images: 12880
Number of test images: 16097
Number of validation images: 3226
data\WIDERFace\WIDER_train\images/0--Parade/0_Parade_marchingband_1_483.jpg
[[9, 485, 419, 48, 65, 1]]
('data\\WIDERFace\\WIDER_train\\images/0--Parade/0_Parade_marchingband_1_483.jpg', [[9, 485, 419, 48, 65, 1]])


In [9]:
ANCHOR_SIZE = 4

def iou(boxA, boxB):
  #evaluate the intersection points 
  xA = np.maximum(boxA[0], boxB[0])
  yA = np.maximum(boxA[1], boxB[1])
  xB = np.minimum(boxA[2], boxB[2])
  yB = np.minimum(boxA[3], boxB[3])

  # compute the area of intersection rectangle
  interArea = np.maximum(0, xB - xA + 1) * np.maximum(0, yB - yA + 1)

  # compute the area of both the prediction and ground-truth
  # rectangles
  boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
  boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)

  #compute the union 
  unionArea = (boxAArea + boxBArea - interArea)

  # return the intersection over union value
  return interArea / unionArea

#for a given box we predict the corrosponding bounding box 
def get_anchor(box):

  max_iou = 0.0 
  
  matching_anchor  = [0, 0, 0, 0]
  matching_index   = (0, 0)
  i = 0 
  j = 0 
  
  w , h = (1/ANCHOR_SIZE, 1/ANCHOR_SIZE)
  
  for x in np.linspace(0, 1, ANCHOR_SIZE +1)[:-1]:
    j = 0 
    for y in np.linspace(0, 1, ANCHOR_SIZE +1)[:-1]:
      xmin = x 
      ymin = y
      
      xmax = (x + w) 
      ymax = (y + h) 
      
      anchor_box = [xmin, ymin, xmax, ymax]
      curr_iou = iou(box, anchor_box)
      
      #choose the location with the highest overlap 
      if curr_iou > max_iou:
        matching_anchor = anchor_box
        max_iou = curr_iou
        matching_index = (i, j)
      j += 1
    i+= 1
  return matching_anchor, matching_index

In [3]:
def create_volume(boxes):
  output = np.zeros((ANCHOR_SIZE, ANCHOR_SIZE, 5))
  for box in boxes:
    if max(box) == 0:
      continue
    _, (i, j) = get_anchor(box)
    output[i,j, :] = [1] + box
  return output

In [29]:
#read all the files for annotation 
annot_files = glob.glob('data/WIDERFace/wider_face_split/wider_face_train_bbx_gt.txt')
data = {}
for file in annot_files:  
  with open(file, 'r') as f:
    rows = f.readlines()
    
  j = len(rows)
  i = 0   
  while(i < j):
    #get the file name
    file_name = rows[i].replace('\n', '')+'.jpg'
    
    #get the number of boxes 
    num_boxes = int(rows[i+1])
    boxes = []
    
    img = Image.open(file_name)
    w, h = img.size
    #get all the bounding boxes
    for k in range(1, num_boxes+1):
      box = rows[i+1+k]
      box = box.split(' ')[0:5]
      box = [float(x) for x in box]
      
      #convert ellipse to a box 
      xmin = int(box[3]- box[1])
      ymin = int(box[4]- box[0])
      xmax = int(xmin + box[1]*2)
      ymax = int(ymin + box[0]*2)
      boxes.append([xmin/w, ymin/h, xmax/w, ymax/h])
    #conver the boxes to a volume of fixed size 
    data[file_name] = create_volume(boxes)
    i = i + num_boxes+2

ValueError: invalid literal for int() with base 10: 'data\\WIDERFace\\WIDER_train\\images/0--Parade/0_Parade_Parade_0_904.jpg 1 361 98 263 339 1\n'

In [10]:
IMG_SIZE = 240
BATCH_SIZE = 32

In [11]:
import tensorflow as tf
from keras.layers import Conv2D, MaxPooling2D, Dense, Input 
from keras.layers import Flatten, Dropout, BatchNormalization, Concatenate, Reshape, GlobalAveragePooling2D, Reshape
from keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input
import cv2
import matplotlib.pyplot as plt 
import os 
import numpy as np
from PIL import Image
from random import shuffle
import random

In [13]:
tf.executing_eagerly()

True

In [64]:
import tensorflow as tf
import numpy as np
import os

GRID_SIZE = 10
NUM_BOXES_PER_CELL = 2


def parse_image(filename, label):
    image_string = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(image_string, channels=3)
    image = tf.image.convert_image_dtype(image, tf.float32)
    image = tf.image.resize(image, [240, 240])
    label = tf.cast(label, tf.float32)
    return image, label


def parse_label(filename, annotations):
    image_annotations = annotations[tf.strings.basename(filename).numpy().decode('utf-8')]

    # Create an empty grid of cells, each of which will predict up to `NUM_BOXES_PER_CELL` bounding boxes
    grid = np.zeros((GRID_SIZE, GRID_SIZE, NUM_BOXES_PER_CELL, 5))

    # Convert the annotations for each bounding box into the appropriate format
    for annotation in image_annotations:
        # Extract the bounding box coordinates and class label
        class_label, x_min, y_min, w, h = annotation

        # Calculate the center coordinates of the bounding box
        x_center = x_min + w/2
        y_center = y_min + h/2

        # Map the center coordinates to the appropriate cell in the grid
        cell_x = int(x_center * GRID_SIZE)
        cell_y = int(y_center * GRID_SIZE)

        # Find the first empty bounding box slot in the cell and store the box information there
        for i in range(NUM_BOXES_PER_CELL):
            if grid[cell_y, cell_x, i, 0] == 0:
                grid[cell_y, cell_x, i] = [class_label, x_min, y_min, w, h]
                break

    return grid


def create_dataset(annotations_path, batch_size=32, shuffle=True):
    annotations = load_wider_annotations(annotations_path)
    image_paths = tf.constant([os.path.join(filename) for filename in annotations.keys()])
    labels = tf.constant([annotations[filename] for filename in annotations.keys()])

    dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels))
    if shuffle:
        dataset = dataset.shuffle(buffer_size=len(annotations))
    dataset = dataset.map(lambda filename, label: (parse_image(filename, label), parse_label(filename, annotations)), num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(buffer_size=tf.data.AUTOTUNE)
    return dataset


In [65]:
train_dataset = create_dataset(train_labels_path, shuffle=True)
val_dataset = create_dataset(val_labels_path, shuffle=False)
test_dataset = create_dataset(test_labels_path, shuffle=False)

# Retrieve the first element from the dataset
example = next(iter(train_dataset))

# Print the contents of the element
print('Image shape:', example[0].shape)
print('Label:', example[1])

len(train_dataset), len(val_dataset), len(test_dataset)

AttributeError: in user code:

    File "C:\Users\Harry Parker\AppData\Local\Temp\ipykernel_33992\3421359932.py", line 54, in None  *
        lambda filename, label: (parse_image(filename, label), parse_label(filename, annotations))
    File "C:\Users\Harry Parker\AppData\Local\Temp\ipykernel_33992\3421359932.py", line 19, in parse_label  *
        image_annotations = annotations[tf.strings.basename(filename).numpy().decode('utf-8')]

    AttributeError: module 'tensorflow._api.v2.strings' has no attribute 'basename'


In [45]:
def plot_annot(img, boxes):  

  img = img.numpy()
  boxes = boxes.numpy()
  boxes = tf.reshape(boxes, [ANCHOR_SIZE, ANCHOR_SIZE, 5])

  
  for i in range(0, ANCHOR_SIZE):
    for j in range(0, ANCHOR_SIZE):
      box = boxes[i, j, 1:] * IMG_SIZE
      label = boxes[i, j, 0]
      
      if np.max(box) > 0:
        img = cv2.rectangle(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (1, 0, 0), 1)

  plt.axis('off')
  plt.imshow(img)
  plt.show()

In [46]:
for x, y in train_dataset:
  
  plot_annot(x[0], y[0])
  break

InvalidArgumentError: {{function_node __wrapped__Reshape_device_/job:localhost/replica:0/task:0/device:CPU:0}} Input to reshape is a tensor with 6 values, but the requested shape has 80 [Op:Reshape]

In [22]:
def conv_block(fs, x, activation = 'relu'):
  conv  = Conv2D(fs, (3, 3), padding = 'same', activation = activation)(x)
  bnrm  = BatchNormalization()(conv)
  drop  = Dropout(0.5)(bnrm)
  return drop

def residual_block(fs, x):
  y = conv_block(fs, x)
  y = conv_block(fs, y)
  y = conv_block(fs, y)
  return Concatenate(axis = -1)([x, y])

In [23]:
inp = Input(shape = (IMG_SIZE, IMG_SIZE, 3))

block1 = residual_block(16, inp)
pool1  = MaxPooling2D(pool_size = (2, 2))(block1)
block2 = residual_block(32, pool1)
pool2  = MaxPooling2D(pool_size = (2, 2))(block2)
block3 = residual_block(64, pool2)
pool3  = MaxPooling2D(pool_size = (2, 2))(block3)
block4 = residual_block(128, pool3)
pool4  = MaxPooling2D(pool_size = (2, 2))(block4)
block5 = residual_block(256, pool4)
pool5  = MaxPooling2D(pool_size = (2, 2))(block5)
out  = Conv2D(5, (3, 3), padding = 'same', activation = 'sigmoid')(pool5)

#create a model with one input and two outputs 
model = tf.keras.models.Model(inputs = inp, outputs = out)

In [24]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 240, 240, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d (Conv2D)                (None, 240, 240, 16  448         ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 batch_normalization (BatchNorm  (None, 240, 240, 16  64         ['conv2d[0][0]']                 
 alization)                     )                                                             

In [25]:
def loss(pred, y):
  
  #extract the boxes that have values (i.e discard boxes that are zeros)
  mask = y[...,0]
  boxA    = tf.boolean_mask(y, mask)
  boxB    = tf.boolean_mask(pred, mask)
  
  prediction_error = tf.keras.losses.binary_crossentropy(y[...,0], pred[...,0])

  detection_error = tf.losses.absolute_difference(boxA[...,1:], boxB[...,1:]) 
  
  
  return tf.reduce_mean(prediction_error) + 10*detection_error
           
def grad(model, x, y):
  #record the gradient
  with tf.GradientTape() as tape:
    pred = model(x)
    value = loss(pred, y)
  #return the gradient of the loss function with respect to the model variables 
  return tape.gradient(value, model.trainable_variables)

batches_per_epoch = len(train_dataset)
lr_decay = (1./0.75 -1)/batches_per_epoch
opt = tf.keras.optimizers.Adam(learning_rate=0.0001, beta_1=0.9, beta_2=0.999)

In [26]:
epochs = 20

#initialize the history to record the metrics 
train_loss_history = tf.metrics.Mean('train_loss')

test_loss_history = tf.metrics.Mean('test_loss')

best_loss = 1.0 

In [27]:
for i in range(1, epochs + 1):
  
  for x, y in train_dataset:
    pred = model(x)
    grads = grad(model, x, y)

    #update the paramters of the model 
    opt.apply_gradients(zip(grads, model.trainable_variables), global_step = tf.train.get_or_create_global_step())

    #record the metrics of the current batch 
    loss_value = loss(pred, y)
    
    #calcualte the metrics of the current batch
    train_loss_history(loss_value)
    
  #loop over the test dataset 
  for x, y in test_dataset:
    pred = model(x)
    
    #calcualte the metrics of the current batch 
    loss_value = loss(pred, y)
    
    #record the values of the metrics 
    test_loss_history(loss_value)
        
  #print out the results 
  print("epoch: [{0:d}/{1:d}], Train: [loss: {2:0.4f}], Test: [loss: {3:0.4f}]".
       format(i, epochs, train_loss_history.result(), 
              test_loss_history.result()))
  
  current_loss = test_loss_history.result().numpy()
  
  #save the best model 
  if current_loss  < best_loss:
    best_loss = current_loss
    print('saving best model with loss ', current_loss)
    model.save('keras.h5')
    
  #clear the history after each epoch 
  train_loss_history.init_variables()
  test_loss_history.init_variables()


ValueError: Input 0 of layer "model" is incompatible with the layer: expected shape=(None, 240, 240, 3), found shape=(240, 240, 3)

In [None]:
from keras.models import load_model
best_model = load_model('keras.h5')

In [28]:
#visualize the predicted bounding box
def plot_pred(img_id):
  font = cv2.FONT_HERSHEY_SIMPLEX

  raw = cv2.imread(img_id)[:,:,::-1]
  
  h, w = (512, 512)
  
  img = cv2.resize(raw, (IMG_SIZE, IMG_SIZE)).astype('float32')
  img = np.expand_dims(img, 0)/255. 
  
  boxes = best_model(img).numpy()[0]

  raw = cv2.resize(raw, (w, h))
    
  for i in range(0, ANCHOR_SIZE):
    for j in range(0, ANCHOR_SIZE):
      box = boxes[i, j, 1:] * w
      lbl = round(boxes[i, j, 0], 2)
      if lbl > 0.5:
        color = [random.randint(0, 255) for _ in range(0, 3)]
        raw = cv2.rectangle(raw, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), color, 3) 
        raw = cv2.rectangle(raw, (int(box[0]), int(box[1])-30), (int(box[0])+70, int(box[1])), color, cv2.FILLED)
        raw = cv2.putText(raw, str(lbl), (int(box[0]), int(box[1])), font, 1, (255, 255, 255), 2)
        

  plt.axis('off')
  plt.imshow(raw)
  plt.show()

In [46]:
img_id = np.random.choice(test_dataset.take(1).as_numpy_iterator()[0][0].shape[0])
plot_pred(img_id)

TypeError: '_NumpyIterator' object is not subscriptable

In [47]:
plot_pred('test.jpg')

TypeError: 'NoneType' object is not subscriptable