Step 1: Preprocess the FDDB Dataset

Extract the images and annotations from the dataset and preprocess them as necessary. This may involve resizing the images, converting them to grayscale, and normalizing the pixel values.

In [74]:
%pip install roboflow

from roboflow import Roboflow
rf = Roboflow(api_key="Rx8IIyW6gaJjJza6uCmr")
project = rf.workspace("fddb").project("face-detection-40nq0")
dataset = project.version(1).download("voc")

ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
awsebcli 3.20.3 requires PyYAML<5.5,>=5.3.1, but you have pyyaml 6.0 which is incompatible.

[notice] A new release of pip is available: 23.0 -> 23.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


Collecting roboflow
  Downloading roboflow-1.0.5-py3-none-any.whl (56 kB)
     ---------------------------------------- 0.0/56.2 kB ? eta -:--:--
     ---------------------------------------- 56.2/56.2 kB 3.1 MB/s eta 0:00:00
Collecting cycler==0.10.0
  Downloading cycler-0.10.0-py2.py3-none-any.whl (6.5 kB)
Collecting idna==2.10
  Downloading idna-2.10-py2.py3-none-any.whl (58 kB)
     ---------------------------------------- 0.0/58.8 kB ? eta -:--:--
     ---------------------------------------- 58.8/58.8 kB ? eta 0:00:00
Collecting pyparsing==2.4.7
  Downloading pyparsing-2.4.7-py2.py3-none-any.whl (67 kB)
     ---------------------------------------- 0.0/67.8 kB ? eta -:--:--
     ---------------------------------------- 67.8/67.8 kB 3.6 MB/s eta 0:00:00
Collecting python-dotenv
  Downloading python_dotenv-1.0.0-py3-none-any.whl (19 kB)
Collecting wget
  Downloading wget-3.2.zip (10 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status '

Extracting Dataset Version Zip to Face-detection-1 in voc:: 100%|██████████| 11552/11552 [00:23<00:00, 482.14it/s]


In [65]:
import os
import cv2
import numpy as np

from keras.utils import img_to_array

def convert_data(fddb_path, output_path):
    # Create output directory if it doesn't exist
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    # Loop over all the fold files in the FDDB dataset
    for fold_idx in range(1, 11):
        fold_path = os.path.join(fddb_path, 'FDDB-folds', f'FDDB-fold-{fold_idx:02d}.txt')
        with open(fold_path, 'r') as f:
            lines = f.readlines()
            for i in range(0, len(lines), 2):
                # Read the image and convert to RGB
                img_path = os.path.join(fddb_path, lines[i].strip() + '.jpg')
                img = cv2.imread(img_path)

                # Get the face bounding box coordinates
                num_faces = int(lines[i+1])
                face_coords = []
                for j in range(num_faces):
                    face_coord = [int(coord) for coord in lines[i+2+j].split()]
                    face_coords.append(face_coord)

                # Crop and resize the faces and save them to disk
                for face_idx, face_coord in enumerate(face_coords):
                    x, y, w, h = face_coord
                    face = img[y:y+h, x:x+w]
                    face = cv2.resize(face, (224, 224))
                    face = img_to_array(face)
                    output_file = os.path.join(output_path, f'{fold_idx}_{i//2}_{face_idx}.npy')
                    np.save(output_file, face)


In [66]:
fddb_path = 'data/FDDB'
output_path = 'data/FDDB/preprocessed/data'

convert_data(fddb_path, output_path)


ValueError: invalid literal for int() with base 10: '2002/08/26/big/img_265\n'

Step 2: Prepare the Training Data

Use the annotations in the dataset to create labeled training data.
Generate positive and negative samples by extracting image patches that contain faces or do not contain faces, respectively.
Split the training data into a training set and a validation set.

In [26]:
import cv2
import numpy as np
from sklearn.model_selection import train_test_split


# Load images and preprocess them
image_size = (224, 224) # or any other fixed size
train_images = []
train_annotations = []
val_images = []
val_annotations = []

for image_path in annotations.keys():
    image = cv2.imread(image_path)
    image = cv2.resize(image, image_size)
    image = image.astype(np.float32) / 255.0 # normalize pixel values to [0, 1]
    boxes = annotations[image_path]
    
    if np.random.random() < 0.8: # 80% of the images for training
        train_images.append(image)
        train_annotations.append(boxes)
    else: # 20% of the images for validation
        val_images.append(image)
        val_annotations.append(boxes)

# Convert the lists to numpy arrays
train_images = np.array(train_images)
train_annotations = np.array(train_annotations)
val_images = np.array(val_images)
val_annotations = np.array(val_annotations)

# Generate training and validation samples
train_samples = []
train_labels = []
val_samples = []
val_labels = []

for i in range(len(train_images)):
    sample = train_images[i]
    boxes = train_annotations[i]
    label = np.zeros((4,))
    for box in boxes:
        label += np.array([
            box[0] / image_size[0],
            box[1] / image_size[1],
            box[2] / image_size[0],
            box[3] / image_size[1],
        ])
    label /= len(boxes)
    train_samples.append(sample)
    train_labels.append(label)

for i in range(len(val_images)):
    sample = val_images[i]
    boxes = val_annotations[i]
    label = np.zeros((4,))
    for box in boxes:
        label += np.array([
            box[0] / image_size[0],
            box[1] / image_size[1],
            box[2] / image_size[0],
            box[3] / image_size[1],
        ])
    label /= len(boxes)
    val_samples.append(sample)
    val_labels.append(label)

train_samples = np.array(train_samples)
train_labels = np.array(train_labels)
val_samples = np.array(val_samples)
val_labels = np.array(val_labels)

print('train_samples.shape:', train_samples.shape)


train_samples.shape: (0,)


In [73]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

def load_image(image_path):
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return img

def load_annotations(annotations_path):
    with open(annotations_path, 'r') as f:
        lines = f.readlines()
    image_paths = []
    bboxes = []
    for i, line in enumerate(lines):
        if i % 2 == 0:
            image_paths.append(line.strip())
        else:
            num_faces = int(line.strip())
            face_bboxes = []
            for j in range(num_faces):
                face_bbox = [float(x) for x in lines[i+j+1].strip().split()]
                print(face_bbox)
                # convert from ellipse to bounding box
                center_x, center_y, major_axis, minor_axis, angle = face_bbox[0:5]
                face_bbox = [center_x - 0.5 * major_axis,
                             center_y - 0.5 * minor_axis,
                             center_x + 0.5 * major_axis,
                             center_y + 0.5 * minor_axis]
                face_bboxes.append(face_bbox)
            bboxes.append(face_bboxes)
    return image_paths, bboxes

def preprocess_data(image_paths, bboxes, target_size):
    X = []
    y = []
    for i, image_path in enumerate(image_paths):
        img = load_image(image_path)
        for bbox in bboxes[i]:
            xmin, ymin, xmax, ymax = bbox
            x_scale = target_size[0] / img.shape[1]
            y_scale = target_size[1] / img.shape[0]
            xmin = int(xmin * x_scale)
            ymin = int(ymin * y_scale)
            xmax = int(xmax * x_scale)
            ymax = int(ymax * y_scale)
            # crop and resize the face region
            face = img[ymin:ymax, xmin:xmax, :]
            face = cv2.resize(face, target_size)
            X.append(face)
            y.append([xmin, ymin, xmax, ymax])
    X = np.array(X)
    y = np.array(y)
    return X, y

def create_train_val_split(X, y, test_size):
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=test_size, random_state=42)
    return X_train, X_val, y_train, y_val

# Example usage:

annotations_path = 'data\FDDB\FDDB-folds\original\FDDB-fold-01-ellipseList.txt'
image_paths, bboxes = load_annotations(annotations_path)
X, y = preprocess_data(image_paths, bboxes, target_size=(224, 224))
X_train, X_val, y_train, y_val = create_train_val_split(X, y, test_size=0.2)


[123.5833, 85.5495, 1.265839, 269.6934, 161.7812, 1.0]


ValueError: invalid literal for int() with base 10: '2002/08/26/big/img_265'

Step 3: Define the Model Architecture

Choose a suitable model architecture for face detection. You can use pre-trained models such as YOLO, RetinaNet, or SSD or build your own custom model.
Define the model architecture using a deep learning framework such as TensorFlow or Keras.

In [21]:
from keras.models import Model
from keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, GlobalAveragePooling2D
from keras.applications.resnet import ResNet50

def build_model():
    # Define the input shape
    input_shape = (None, None, 3)
    input_layer = Input(shape=input_shape)
    
    # Define the backbone network (ResNet50 in this example)
    backbone = ResNet50(weights='imagenet', include_top=False)
    backbone_output = backbone(input_layer)
    
    # Define the detection head
    detection_head = Conv2D(64, (3, 3), activation='relu', padding='same')(backbone_output)
    detection_head = MaxPooling2D((2, 2))(detection_head)
    detection_head = Conv2D(128, (3, 3), activation='relu', padding='same')(detection_head)
    detection_head = MaxPooling2D((2, 2))(detection_head)
    detection_head = GlobalAveragePooling2D()(detection_head)
    detection_head = Dense(256, activation='relu')(detection_head)
    detection_head = Dense(4, activation='sigmoid')(detection_head)
    
    # Define the model
    model = Model(inputs=input_layer, outputs=detection_head)
    
    return model


Step 4: Train the Model

Train the model on the training data using a suitable optimizer and loss function.
Monitor the training process by plotting the loss and accuracy during each epoch.
Evaluate the model on the validation set to ensure that it is not overfitting.

In [22]:
model = build_model()
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None, None, 3)]   0         
                                                                 
 resnet50 (Functional)       (None, None, None, 2048)  23587712  
                                                                 
 conv2d (Conv2D)             (None, None, None, 64)    1179712   
                                                                 
 max_pooling2d (MaxPooling2D  (None, None, None, 64)   0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, None, None, 128)   73856     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, None, None, 128)  0         
 2D)                                                         

In [23]:
from keras.callbacks import EarlyStopping, ModelCheckpoint

batch_size = 32
epochs = 10

early_stopping = EarlyStopping(patience=2)
model_checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True)

# Generate training data

print("Training...")

print(train_samples.shape)
print(train_labels.shape)
print(val_samples.shape)
print(val_labels.shape)

history = model.fit(train_samples, train_labels, epochs=10, batch_size=32, validation_data=(val_samples, val_labels))




Training...
(0,)
(0,)
(0,)
(0,)
Epoch 1/10


ValueError: Unexpected result of `train_function` (Empty logs). Please use `Model.compile(..., run_eagerly=True)`, or `tf.config.run_functions_eagerly(True)` for more information of where went wrong, or file a issue/bug to `tf.keras`.

In [None]:
# Plot the training and validation loss and accuracy
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 4))

# Plot the loss
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='train')
plt.plot(history)
plt.plot(history.history['val_loss'], label='validation')
plt.legend()
plt.xlabel('Epoch')
plt.ylabel('Loss')

# Plot the accuracy
plt.subplot(1, 2, 2)
plt.plot(history.history['acc'], label='train')
plt.plot(history.history['val_acc'], label='validation')
plt.legend()
plt.xlabel('Epoch')
plt.ylabel('Accuracy')

plt.show()


# Load the model
from keras.models import load_model

model = load_model('best_model.h5')


# Generate test data
test_samples = []
test_labels = []
for annotation in annotations:
    filename, x1, y1, x2, y2 = annotation
    image = cv2.imread(filename)
    if image is None:
        continue
    size = np.random.randint(12, min(image.shape[:2]))
    x = np.random.randint(0, image.shape[1]-size)
    y = np.random.randint(0, image.shape[0]-size)
    sample = image[y:y+size, x:x+size]
    sample = cv2.resize(sample, (64, 64))
    test_samples.append(sample)
    test_labels.append([1, x1-x, y1-y, x2-x, y2-y])
test_samples = np.array(test_samples)
test_labels = np.array(test_labels)


# Evaluate the model
model.evaluate(test_samples, test_labels)


# Make predictions
predictions = model.predict(test_samples)


# Visualize the predictions
for i in range(10):
    sample = test_samples[i]
    prediction = predictions[i]
    x1 = int(prediction[1])
    y1 = int(prediction[2])
    x2 = int(prediction[3])
    y2 = int(prediction[4])
    cv2.rectangle(sample, (x1, y1), (x2, y2), (0, 255, 0), 2)
    plt.imshow(sample)
    plt.show()
