In [5]:
import os
import pandas as pd

def load_wider_face_annotations(data_dir):
    """
    Load WIDER face dataset annotations from text files into a pandas DataFrame.
    Args:
        data_dir: str, path to the directory containing the dataset files.
    Returns:
        annotations: pandas DataFrame containing the annotations for the WIDER face dataset.
    """
    # Define the paths to the annotation files
    train_file = os.path.join(data_dir, 'wider_face_train_bbx_gt.txt')
    val_file = os.path.join(data_dir, 'wider_face_val_bbx_gt.txt')
    test_file = os.path.join(data_dir, 'wider_face_test_filelist.txt')

    # Load the training annotations
    train_annotations = pd.read_csv(train_file, sep='\t', header=None,
                                    names=['filename', 'num_faces', 'x1', 'y1', 'width', 'height', 'blur', 'expression', 'illumination', 'invalid', 'occlusion', 'pose'])

    # Drop the columns we don't need
    train_annotations = train_annotations.drop(['num_faces', 'blur', 'expression', 'illumination', 'invalid', 'occlusion', 'pose'], axis=1)

    # Add the full path to the image filename
    train_annotations['filename'] = train_annotations['filename'].apply(lambda x: os.path.join(data_dir, 'WIDER_train/images', x))

    # Take the first 1400 images
    train_annotations = train_annotations[:1400]

    # Load the validation annotations
    val_annotations = pd.read_csv(val_file, sep='\t', header=None,
                                  names=['filename', 'num_faces', 'x1', 'y1', 'width', 'height', 'blur', 'expression', 'illumination', 'invalid', 'occlusion', 'pose'])

    # Drop the columns we don't need
    val_annotations = val_annotations.drop(['num_faces', 'blur', 'expression', 'illumination', 'invalid', 'occlusion', 'pose'], axis=1)

    # Add the full path to the image filename
    val_annotations['filename'] = val_annotations['filename'].apply(lambda x: os.path.join(data_dir, 'WIDER_val/images', x))

    # Take the first 200 images
    val_annotations = val_annotations[:200]

    # Load the test annotations
    test_annotations = pd.read_csv(test_file, sep=' ', header=None, names=['filename'])

    # Add the full path to the image filename
    test_annotations['filename'] = test_annotations['filename'].apply(lambda x: os.path.join(data_dir, 'WIDER_test/images', x + '.jpg'))

    # Take the first 400 images
    test_annotations = test_annotations[:400]

    # Combine the training, validation, and test annotations into a single DataFrame
    annotations = pd.concat([train_annotations, val_annotations, test_annotations], ignore_index=True)

    return annotations


In [6]:
data_dir = 'data/WIDERFace/wider_face_split'
annotations = load_wider_face_annotations(data_dir)

In [13]:
import os
from PIL import Image
import numpy as np
import pandas as pd

def load_custom_datasets(annotations, train_dir, test_dir, validation_dir, train_size=1400, test_size=400, val_size=200):
    """
    Load custom train, test, and validation datasets from the WIDER Face dataset annotations and images.
    Args:
        annotations: str, the directory containing the WIDER Face dataset annotations.
        image_dir: str, the directory containing the WIDER Face dataset images.
        train_size: int, the number of images to use for training.
        test_size: int, the number of images to use for testing.
        val_size: int, the number of images to use for validation.
    Returns:
        train_dataset: tuple, (list of numpy arrays, list of numpy arrays), the train dataset images and labels.
        test_dataset: tuple, (list of numpy arrays, list of numpy arrays), the test dataset images and labels.
        val_dataset: tuple, (list of numpy arrays, list of numpy arrays), the validation dataset images and labels.
    """
 # Load the first train_size images for the train dataset.
    train_annotation_path = os.path.join(annotations, 'wider_face_train_bbx_gt.txt')
    with open(train_annotation_path) as f:
        train_lines = f.readlines()
    train_lines = train_lines[:train_size * 2] # Multiply by 2 since each image has 2 lines of annotations
    train_images = []
    train_labels = []
    for i in range(0, len(train_lines), 2):
        filepath = os.path.join(train_dir, train_lines[i].strip())
        image = np.array(Image.open(filepath))
        num_boxes = int(train_lines[i + 1])
        boxes = []
        for j in range(num_boxes):
            box_coords = np.array(train_lines[i + 2 + j].strip().split(), dtype=np.float32)
            boxes.append(box_coords)
        train_images.append(image)
        train_labels.append(np.array(boxes))

    # Load the first test_size images for the test dataset.
    test_annotation_path = os.path.join(annotations, 'wider_face_test_filelist.txt')
    with open(test_annotation_path) as f:
        test_lines = f.readlines()
    test_lines = test_lines[:test_size * 2] # Multiply by 2 since each image has 2 lines of annotations
    test_images = []
    test_labels = []
    for i in range(0, len(test_lines), 2):
        filepath = os.path.join(test_dir, test_lines[i].strip())
        image = np.array(Image.open(filepath))
        test_images.append(image)
        test_labels.append(np.array([]))

    # Load the first val_size images for the validation dataset.
    val_annotation_path = os.path.join(annotations, 'wider_face_val_bbx_gt.txt')
    with open(val_annotation_path) as f:
        val_lines = f.readlines()
    val_lines = val_lines[:val_size * 2] # Multiply by 2 since each image has 2 lines of annotations
    val_images = []
    val_labels = []
    for i in range(0, len(val_lines), 2):
        filepath = os.path.join(validation_dir, val_lines[i].strip())
        image = np.array(Image.open(filepath))
        num_boxes = int(val_lines[i + 1])
        boxes = []
        for j in range(num_boxes):
            box_coords = np.array(val_lines[i + 2 + j].strip().split(), dtype=np.float32)
            boxes.append(box_coords)
        val_images.append(image)
        val_labels.append(np.array(boxes))

    train_dataset = (train_images, train_labels)
    test_dataset = (test_images, test_labels)
    val_dataset = (val_images, val_labels)

    return train_dataset, test_dataset, val_dataset

In [14]:
train_dir = 'data/WIDERFace/WIDER_train'
test_dir = 'data/WIDERFace/WIDER_test'
validation_dir = 'data/WIDERFace/WIDER_val'
annotation_dir = 'data/WIDERFace/wider_face_split'
train_dataset, test_dataset, val_dataset = load_custom_datasets(annotation_dir, train_dir, test_dir, validation_dir)


FileNotFoundError: [Errno 2] No such file or directory: 'data/WIDERFace/WIDER_train\\449 330 122 149 0 0 0 0 0 0'

In [20]:
import keras
from keras.layers import *
from keras.models import *
from keras.optimizers import *
from keras.callbacks import *
from keras import Input

In [21]:
input_shape = (300, 300, 3)
num_classes = 1
num_anchors = 4
num_feature_maps = 6
aspect_ratios = [[1, 2, 0.5], [1, 2, 3, 0.5, 0.3333], [1, 2, 3, 0.5, 0.3333], [1, 2, 3, 0.5, 0.3333], [1, 2, 0.5], [1, 2, 0.5]]
scales = [0.1, 0.2, 0.375, 0.55, 0.725, 0.9]

inputs = Input(shape=input_shape)
x = inputs

for i in range(num_feature_maps):
    x = Conv2D(256, (1, 1), padding='same', name='conv{}'.format(i+1))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = ZeroPadding2D()(x)
    x = Conv2D(512, (3, 3), strides=(2, 2), name='conv{}_2'.format(i+1))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)

classification_layers = []
localization_layers = []

for i in range(num_feature_maps):
    x = Conv2D(num_anchors * num_classes, (3, 3), padding='same', name='conv{}_class'.format(i+1))(x)
    x = Reshape((-1, num_classes))(x)
    classification_layers.append(x)

for i in range(num_feature_maps):
    x = Conv2D(num_anchors * 4, (3, 3), padding='same', name='conv{}_box'.format(i+1))(x)
    x = Reshape((-1, 4))(x)
    localization_layers.append(x)

classification_output = Concatenate(axis=1, name='classification')(classification_layers)
localization_output = Concatenate(axis=1, name='localization')(localization_layers)

model = Model(inputs=inputs, outputs=[classification_output, localization_output])


ValueError: Input 0 of layer "conv2_class" is incompatible with the layer: expected min_ndim=4, found ndim=3. Full shape received: (None, 100, 1)

In [None]:
import tensorflow as tf
from keras.losses import binary_crossentropy
from keras.backend import epsilon

def smooth_l1(y_true, y_pred):
    absolute_loss = tf.abs(y_true - y_pred)
    square_loss = 0.5 * (y_true - y_pred)**2
    loss = tf.where(tf.less(absolute_loss, 1.0), square_loss, absolute_loss - 0.5)
    return tf.reduce_mean(loss)

def total_loss(y_true, y_pred):
    classification_loss = binary_crossentropy(y_true[:,:,:1], y_pred[:,:,:1])
    localization_loss = smooth_l1(y_true[:,:,1:], y_pred[:,:,1:])
    regularization_loss = tf.reduce_sum(tf.abs(y_pred))
    return classification_loss + localization_loss + 0.001 * regularization_loss

optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss=total_loss, metrics=[smooth_l1])


In [22]:
import os
import cv2
import numpy as np

# Define the path to the WIDER Face dataset
dataset_path = 'data/WIDERFace'

# Load the annotations
train_file = os.path.join(dataset_path, 'wider_face_split', 'wider_face_train_bbx_gt.txt')
val_file = os.path.join(dataset_path, 'wider_face_split', 'wider_face_val_bbx_gt.txt')

def load_annotations(file_path):
    with open(file_path) as f:
        annotations = f.readlines()
    annotations = [line.strip().split() for line in annotations]
    annotations = [(os.path.join(dataset_path, 'WIDER_train', line[0]), int(line[1]), [list(map(int, box.split())) for box in line[2:]]) for line in annotations]
    return annotations

train_annotations = load_annotations(train_file)
val_annotations = load_annotations(val_file)

# Define the classes
classes = ['face']

# Define the input size
input_shape = (300, 300)

# Define the preprocessing function
def preprocess_data(image_path, annotations):
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, input_shape)
    image = image.astype(np.float32) / 255.0
    boxes = []
    labels = []
    for annotation in annotations:
        box = np.array(annotation[:4])
        box = np.clip(box, 0, input_shape[0])
        box[2:] = box[2:] - box[:2]
        boxes.append(box)
        labels.append(classes.index('face'))
    return image, np.array(boxes), np.array(labels)


IndexError: list index out of range

In [None]:
# Import necessary libraries
import tensorflow as tf
import numpy as np
import os

# Define the backbone network (ResNet50)
backbone = tf.keras.applications.ResNet50(include_top=False, input_shape=(300, 300, 3))

# Define the SSD model
num_classes = 1
input_shape = (300, 300, 3)
ssd_model = tf.keras.models.Sequential(name='ssd')
ssd_model.add(backbone)
ssd_model.add(tf.keras.layers.Conv2D(1024, kernel_size=(3, 3), padding='same', activation='relu'))
ssd_model.add(tf.keras.layers.Conv2D(1024, kernel_size=(1, 1), padding='same', activation='relu'))
ssd_model.add(tf.keras.layers.Conv2D(4*num_classes, kernel_size=(1, 1), padding='same', activation=None))
ssd_model.add(tf.keras.layers.Reshape((-1, 4)))
ssd_model.add(tf.keras.layers.Activation('sigmoid'))

# Define the loss function and optimizer
ssd_loss = tf.keras.losses.Huber()
ssd_optimizer = tf.keras.optimizers.Adam(lr=0.0001)

# Compile the model
ssd_model.compile(optimizer=ssd_optimizer, loss=ssd_loss)

# Prepare the WIDER Face dataset
train_dataset = tf.data.Dataset.from_tensor_slices(train_data)
train_dataset = train_dataset.map(preprocess_data)
train_dataset = train_dataset.shuffle(buffer_size=1000)
train_dataset = train_dataset.batch(batch_size=32)

val_dataset = tf.data.Dataset.from_tensor_slices(val_data)
val_dataset = val_dataset.map(preprocess_data)
val_dataset = val_dataset.batch(batch_size=32)

# Train the model
epochs = 50
steps_per_epoch = len(train_data) // batch_size
validation_steps = len(val_data) // batch_size

ssd_model.fit(train_dataset, epochs=epochs, steps_per_epoch=steps_per_epoch, validation_data=val_dataset, validation_steps=validation_steps)

# Evaluate the model on the validation set
val_loss = ssd_model.evaluate(val_dataset)

# Compute the average precision (AP) for different IoU thresholds
iou_thresholds = [0.5, 0.7, 0.9]
average_precisions = ssd_utils.compute_average_precisions(val_dataset, ssd_model, iou_thresholds=iou_thresholds)

# Use the trained model to detect faces in new images
test_image = cv2.imread('test_image.jpg')
test_image = cv2.resize(test_image, input_shape[:2])
test_image = np.expand_dims(test_image, axis=0)
detections = ssd_model.predict(test_image)

# Post-process the output bounding box predictions to remove duplicates and non-maximum suppression
boxes, scores = ssd_utils.decode_detections(detections, confidence_thresh=0.5, iou_threshold=0.45, top_k=200)
boxes, scores = ssd_utils.non_maximum_suppression(boxes, scores, max_output_size=50, iou_threshold=0.45)
