In [None]:
!pip install tensorflow opencv-python matplotlib 

In [None]:
import tensorflow as tf
import json

In [None]:
### OPTIONAL - Avoid OOM errors by setting GPU Memory Consumption Growth 
# If GPU and CUDA has been configured on your PC
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

In [None]:
import tensorflow as tf
import os
import numpy as np
import cv2
from matplotlib import pyplot as plt

# # Building image loading function
def load_image(x):
    byte_img = tf.io.read_file(x)
    img = tf.io.decode_jpeg(byte_img)
    return img

# # Building label loading function
def load_labels(label_path):
    with open(label_path.numpy(), 'r', encoding = "utf-8") as f:
        label = json.load(f)
    return [label['class']], label['bbox']

In [None]:
# Load Augmented Images to Tensorflow Dataset

AUG_DATA = "aug_data"
TRAIN = "train"
VAL = "val"
TEST = "test"
IMAGES = "images"
LABELS = "labels"

resize = 120

def get_images(folder):
    images = tf.data.Dataset.list_files(os.path.join(AUG_DATA, folder, IMAGES, '*.jpg'), shuffle=False)
    images = images.map(load_image)
    images = images.map(lambda x: tf.image.resize(x, (resize,resize))) # Resizing to be more efficienty model
    images = images.map(lambda x: x/255)  # Apply a range to 255 to apply sigmoid on the next process, to has a equal range
    return images

train_images = get_images(TRAIN)
test_images = get_images(TEST)
val_images = get_images(VAL)

In [None]:
### OPTIONAL - This is to check train images variable
train_images.as_numpy_iterator().next() 

### OPTIONAL - Only checking if the train_labels cotains correctly files dataset
train_labels = tf.data.Dataset.list_files(os.path.join(AUG_DATA, TRAIN, LABELS, '*.json'), shuffle=False)
train_labels.as_numpy_iterator().next()

In [None]:
# Load Labels to TensorFlow Dataset

def get_labels(folder):
    labels = tf.data.Dataset.list_files(os.path.join(AUG_DATA, folder, LABELS, '*.json'), shuffle=False)
    labels = labels.map(lambda x: tf.py_function(load_labels, [x], [tf.uint8, tf.float16]))
    return labels

train_labels = get_labels(TRAIN)
test_labels = get_labels(TEST)
val_labels = get_labels(VAL)


In [None]:
### OPTIONAL - Check variables sizes
len(train_images), len(train_labels), len(test_images), len(test_labels), len(val_images), len(val_labels)

In [None]:
# Combine label and Image samples
# Create Final Datasets (Images/Labels)

train = tf.data.Dataset.zip((train_images, train_labels))
train = train.shuffle(1300) # Define always greater than images size (Images size = 11100 Then 12100 is good)
train = train.batch(8)
train = train.prefetch(4)

test = tf.data.Dataset.zip((test_images, test_labels))
test = test.shuffle(300)
test = test.batch(8)
test = test.prefetch(4)

val = tf.data.Dataset.zip((val_images, val_labels))
val = val.shuffle(300)
val = val.batch(8)
val = val.prefetch(4)

In [None]:
### OPTIONAL - Check train shape and content
train.as_numpy_iterator().next()[0].shape
train.as_numpy_iterator().next()[1]

In [None]:
### OPTIONAL -View Images and Annotations
data_samples = train.as_numpy_iterator()
res = data_samples.next()

fig, ax = plt.subplots(ncols=8, figsize=(20,20))
for idx in range(8): 
    sample_image = res[0][idx]
    sample_coords = res[1][1][idx]
    sample_image = cv2.UMat(sample_image) # Convert to UMat
    cv2.rectangle(sample_image, 
                  tuple(np.multiply(sample_coords[:2], [resize,resize]).astype(int)),
                  tuple(np.multiply(sample_coords[2:], [resize,resize]).astype(int)), 
                        (255,0,0), 2)
    sample_image_display = np.asarray(sample_image.get()) # Convert to np.uint8 to be showed on imshow
    ax[idx].imshow(sample_image_display)

In [None]:
# Build deep learning
# Import Layers and Base Network
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, Dense, GlobalMaxPooling2D
from tensorflow.keras.applications import VGG16

In [None]:
# Download VGG16
vgg = VGG16(include_top=False)

In [None]:
# OPTIONAL - See VGG16 structure
vgg.summary()

In [None]:
# Build instance of Network
def build_model(): 
    input_layer = Input(shape=(resize,resize,3))
    
    vgg = VGG16(include_top=False)(input_layer)

    # Classification Model  
    f1 = GlobalMaxPooling2D()(vgg)
    class1 = Dense(2048, activation='relu')(f1)
    class2 = Dense(1, activation='sigmoid')(class1)
    
    # Bounding box model
    f2 = GlobalMaxPooling2D()(vgg)
    regress1 = Dense(2048, activation='relu')(f2)
    regress2 = Dense(4, activation='sigmoid')(regress1)
    
    facetracker = Model(inputs=input_layer, outputs=[class2, regress2])
    return facetracker

In [None]:
# Test out Neural Network
facetracker = build_model()

In [None]:
# OPTIONAL - See model structure
facetracker.summary()

In [None]:
# Define Losses and Optimizers
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.0001,
    decay_steps=1000, # This value is not too agressive
    decay_rate=0.75,
    staircase=True)

opt = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

In [None]:
# Create Localization Loss and Classification Loss function
def localization_loss(y_true, yhat):            
    delta_coord = tf.reduce_sum(tf.square(y_true[:,:2] - yhat[:,:2])) # Get the difference between y and yhat
                  
    h_true = y_true[:,3] - y_true[:,1] 
    w_true = y_true[:,2] - y_true[:,0] 

    h_pred = yhat[:,3] - yhat[:,1] 
    w_pred = yhat[:,2] - yhat[:,0] 
    
    delta_size = tf.reduce_sum(tf.square(w_true - w_pred) + tf.square(h_true-h_pred))
    
    return delta_coord + delta_size # Localization loss

classloss = tf.keras.losses.BinaryCrossentropy()
regressloss = localization_loss

In [None]:
# Train Neural Network Class
class FaceTracker(Model): 
    def __init__(self, eyetracker,  **kwargs):  # Pre build model
        super().__init__(**kwargs)
        self.model = eyetracker

    def compile(self, opt, classloss, localizationloss, **kwargs): # Compile model (Optmizer, Localization Loss)
        super().compile(**kwargs)
        self.closs = classloss
        self.lloss = localizationloss
        self.opt = opt
    
    def train_step(self, batch, **kwargs):  # Train neural network
        
        X, y = batch
        
        with tf.GradientTape() as tape: 
            classes, coords = self.model(X, training=True) #  Make predictions
            
            batch_classloss = self.closs(y[0], classes) # Calculate loss
            batch_localizationloss = self.lloss(tf.cast(y[1], tf.float32), coords)       
            total_loss = batch_localizationloss+0.5*batch_classloss
            
            grad = tape.gradient(total_loss, self.model.trainable_variables) # Calculate gradiant
        
        opt.apply_gradients(zip(grad, self.model.trainable_variables)) # Apply gradiant descent 
        
        return {"total_loss":total_loss, "class_loss":batch_classloss, "regress_loss":batch_localizationloss}
    
    def test_step(self, batch, **kwargs): 
        X, y = batch
        
        classes, coords = self.model(X, training=False)
        
        batch_classloss = self.closs(y[0], classes)
        batch_localizationloss = self.lloss(tf.cast(y[1], tf.float32), coords)
        total_loss = batch_localizationloss+0.5*batch_classloss
        
        return {"total_loss":total_loss, "class_loss":batch_classloss, "regress_loss":batch_localizationloss}
        
    def call(self, X, **kwargs): 
        return self.model(X, **kwargs)

In [None]:
# Compile Model
model = FaceTracker(facetracker)
model.compile(opt, classloss, regressloss)

In [None]:
# Start Training
logdir='logs'
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir) # Allows tracking and visualizing metrics such as loss and accuracy
hist = model.fit(train, epochs=30, validation_data=val, callbacks=[tensorboard_callback]) # Epochs 30 can be updated

In [None]:
# Plot perfomance from Total loss, Classificiation Loss and Regression Loss
fig, ax = plt.subplots(ncols=3, figsize=(20,5))

ax[0].plot(hist.history['total_loss'], color='teal', label='loss')
ax[0].plot(hist.history['val_total_loss'], color='orange', label='val loss')
ax[0].title.set_text('Loss')
ax[0].legend()

ax[1].plot(hist.history['class_loss'], color='teal', label='class loss')
ax[1].plot(hist.history['val_class_loss'], color='orange', label='val class loss')
ax[1].title.set_text('Classification Loss')
ax[1].legend()

ax[2].plot(hist.history['regress_loss'], color='teal', label='regress loss')
ax[2].plot(hist.history['val_regress_loss'], color='orange', label='val regress loss')
ax[2].title.set_text('Regression Loss')
ax[2].legend()

plt.show()

In [None]:
# Make Predictions
test_data = test.as_numpy_iterator()
test_sample = test_data.next()

yhat = facetracker.predict(test_sample[0])
threshold = 0.5 # Can be updated
predicted_labels = np.array(yhat[0] > threshold, dtype=np.uint8)

wrong_predictions_indices = np.where(predicted_labels != test_sample[1][0].flatten())[0]
wrong_images = test_sample[0][wrong_predictions_indices]
true_labels = test_sample[1][0][wrong_predictions_indices]

In [None]:
# OPTIONAL -Check if has wrong images
# wrong_images.size

In [None]:
# Show predictions above threshold
fig, axes = plt.subplots(nrows=2, ncols=min(len(wrong_images), len(predicted_labels)) // 2, figsize=(15, 5))

for i, ax in enumerate(axes.flatten()):
    if i < len(wrong_images):  # Check if index is within the length of wrong_images
        # Display the image
        ax.imshow(wrong_images[i])

        # Set the title with both true and predicted labels
        ax.set_title(f'True Label: {true_labels[i]}\nPredicted Label: {predicted_labels[i]}', color='red' if true_labels[i] != predicted_labels[i] else 'black')

        # Hide the axes
        ax.axis('off')

plt.tight_layout()
plt.show()

In [None]:
# Plot all labels from prediction using test images
fig, ax = plt.subplots(ncols=8, figsize=(20,20))
for idx in range(8): 
    sample_image = test_sample[0][idx]
    sample_coords = yhat[1][idx]
    sample_image = cv2.UMat(sample_image) # Convert to UMat
    if yhat[0][idx] > 0.5:
        cv2.rectangle(sample_image, 
                      tuple(np.multiply(sample_coords[:2], [120,120]).astype(int)),
                      tuple(np.multiply(sample_coords[2:], [120,120]).astype(int)), 
                            (255,0,0), 2)
    sample_image_display = np.asarray(sample_image.get()) # Convert to np.uint8 to be showed on imshow
    ax[idx].imshow(sample_image_display)

In [None]:
# Save Model
facetracker.save('face_detection_v1.keras')

In [None]:
# Load Model
from tensorflow.keras.models import load_model
facetracker = load_model('face_detection_v1.keras')

In [None]:
# Real Time Detection using trained model
cap = cv2.VideoCapture(0)
while cap.isOpened():
    _ , frame = cap.read()
    frame = frame[50:500, 50:500,:]
    
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    resized = tf.image.resize(rgb, (120,120))
    
    yhat = facetracker.predict(np.expand_dims(resized/255,0))
    sample_coords = yhat[1][0]
    
    if yhat[0] > 0.5:

        # Controls the main rectangle
        cv2.rectangle(frame, 
                      tuple(np.multiply(sample_coords[:2], [450,450]).astype(int)),
                      tuple(np.multiply(sample_coords[2:], [450,450]).astype(int)), 
                            (255,0,0), 2)

        # Controls the label rectangle
        cv2.rectangle(frame, 
                      tuple(np.add(np.multiply(sample_coords[:2], [450,450]).astype(int), 
                                    [0,-30])),
                      tuple(np.add(np.multiply(sample_coords[:2], [450,450]).astype(int),
                                    [80,0])), 
                            (255,0,0), -1)
        
        # Controls the text rendered
        cv2.putText(frame, 'face', tuple(np.add(np.multiply(sample_coords[:2], [450,450]).astype(int),
                                               [0,-5])),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255,255,255), 2, cv2.LINE_AA)
    
    cv2.imshow('Face Detection', frame)
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()