# Install dependencies

In [4]:
%pip install --user labelme tensorflow tensorflow-gpu opencv-python matplotlib albumentations split-folders imutils scikit-learn keras pandas

Collecting pandas
  Downloading pandas-1.5.3-cp39-cp39-win_amd64.whl (10.9 MB)
     ---------------------------------------- 10.9/10.9 MB 8.7 MB/s eta 0:00:00
Collecting pytz>=2020.1
  Using cached pytz-2022.7.1-py2.py3-none-any.whl (499 kB)
Installing collected packages: pytz, pandas
Successfully installed pandas-1.5.3 pytz-2022.7.1
Note: you may need to restart the kernel to use updated packages.


You should consider upgrading via the 'c:\Users\Harry Parker\AppData\Local\Programs\Python\Python39\python.exe -m pip install --upgrade pip' command.


## Import libraries

In [6]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from collections import OrderedDict
import pandas as pd
import cv2
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import transforms, utils
from PIL import Image

%load_ext autoreload
%autoreload 2



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
from models import Generator
from models import Discriminator
from dataset import arrange_data
from dataset import WIDER
from tools import show_grid

In [7]:
# Avoid OOM errors by setting GPU Memory Consumption Growth
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus: 
    tf.config.experimental.set_memory_growth(gpu, True)

tf.config.list_physical_devices('GPU')

[]

# Load dataset

In [8]:
# Transforms for low resolution images and high resolution images
def transform_hl_pair(hr_height, hr_width):

    lr_transforms = [transforms.Resize((hr_height // 4, hr_width // 4), Image.BICUBIC),
                     transforms.ToTensor(),
                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]

    hr_transforms = [transforms.Resize((hr_height, hr_width), Image.BICUBIC),
                     transforms.ToTensor(),
                     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]

    return transforms.Compose(lr_transforms), transforms.Compose(hr_transforms)


def arrange_data(path):

    with open(path, 'r') as f:
        data = f.readlines()

    data = [x.strip() for x in data]
    flags = []
    for (i, x) in enumerate(data):
        if (x.endswith('.jpg')):
            flags.append(i)
        else:
            data[i] = [int(loc) for loc in x.split(' ')[:4]]

    path = np.array(data)[flags].tolist()
    bbxs = [x[2:] for x in np.split(data, flags[1:])]
    return path, bbxs


def iou(a, b):
    sizea = (a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1])
    sizeb = (b[2] - b[0]) * (b[3] - b[1])
    tl = np.maximum(a[:, :2], b[:2])
    br = np.minimum(a[:, 2:], b[2:])
    wh = np.maximum(br - tl, 0)
    size = wh[:, 0] * wh[:, 1]
    return size / (sizea + sizeb - size)


class WIDER(Dataset):

    def __init__(self, base, path, bbxs, high_resolution=(128, 128)):
        self.base = base
        self.path = path
        self.bbxs = bbxs
        self.lr, self.hr = transform_hl_pair(*high_resolution)

    def __len__(self):
        return len(self.path)

    def __getitem__(self, idx):
        img = Image.open(join(self.base, self.path[idx]))
        bbxs = np.vstack(self.bbxs[idx])
        # random select one face
        idx = np.random.randint(0, len(bbxs), 1)
        bbxs[:, 2:] += bbxs[:, 0:2]

        bbx = bbxs[idx, :].squeeze()
        true = img.crop(bbx)
        # random crop a fix-sized background patch
        x, y = np.random.randint(0, min(img.size) - 128, 2)
        bg = [x, y, x + 128, y + 128]
        if np.all(iou(bbxs, bg) < 0.5):
            false = img.crop(bg)
        else:
            false = Image.fromarray(np.random.randint(0, 256, size=(128, 128, 3)).astype('uint8'))
            print("use random noise.")
        return {"lr_face": self.lr(true), "lr_background": self.lr(false),
                "hr_face": self.hr(true), "hr_background": self.hr(false)}


if __name__ == '__main__':
    train_path = "./WIDER/WIDER_train/images/"
    path, bbxs = arrange_data()
    wider = WIDER(train_path, path, bbxs)
    result = wider[22]

NameError: name 'Dataset' is not defined

In [None]:
from torch import nn
import torch
from torchvision import utils
import numpy as np
import math
from matplotlib import pyplot as plt

# Configure loss
mse = nn.MSELoss(reduction='sum')
bce = nn.BCELoss(reduction='none')


def G_loss(g1, g2, pixel_label, d, adv_cls_label, trade_off):
    pixel_loss = mse(g1, pixel_label) + mse(g2, pixel_label)
    adv_cls_loss = torch.sum(bce(d, adv_cls_label) * trade_off)
    return pixel_loss + adv_cls_loss

def D_loss(d, adv_cls_label):
    return torch.sum(bce(d, adv_cls_label))


def weights_init_normal(m):
    classname = m.__class__.__name__
    if classname.find('Linear') != -1:
        torch.nn.init.normal_(m.weight.data, .0, .1)
        torch.nn.init.constant_(m.bias.data, .0)

def show_grid(x, figsize=(10, 20), nrow=8):
    nums, _, length, width = x.size()
    img_grid = utils.make_grid(x, normalize=True, scale_each=True, nrow=nrow)
    r = math.ceil(nums / nrow)
    plt.figure(figsize=figsize)
    plt.imshow(np.transpose(img_grid.detach().cpu().numpy(), (1, 2, 0)))
    plt.gca().xaxis.set_ticks_position("top")
    plt.xticks(np.arange(0, (nrow+1) * (width+2), width+2), np.arange(nrow))
    plt.yticks(np.arange(0, (r+1) * (length+2), (length+2)), np.arange(r))

In [None]:
train_path = "./WIDER/WIDER_train/images/"
val_path = "./WIDER/WIDER_val/images/"
anno_path = "./WIDER/wider_face_split/wider_face_val_bbx_gt.txt"

path, bbxs = arrange_data(anno_path)
wider = WIDER(val_path, path, bbxs)
dataloader = DataLoader(wider, batch_size=16, shuffle=True, num_workers=8)
batch = next(iter(dataloader))

# View images

In [None]:
image_generator = images.batch(4).as_numpy_iterator()

plot_images = image_generator.next()

fig, axes = plt.subplots(ncols=4, figsize=(20, 20))
for i, image in enumerate(plot_images):
    axes[i].imshow(image)
plt.show()




# Combine images and labels

In [None]:
len(train_images), len(test_images), len(val_images), len(train_labels), len(val_labels)

train = tf.data.Dataset.zip((train_images, train_labels))
train = train.shuffle(5000)
train = train.batch(64)
train = train.prefetch(8)

##test = tf.data.Dataset.zip((test_images, test_labels))
#test = test.shuffle(1500)
#test = test.batch(64)
#test = test.prefetch(8)

val = tf.data.Dataset.zip((val_images, val_labels))
val = val.shuffle(1000)
val = val.batch(64)
val = val.prefetch(8)

train.as_numpy_iterator().next()[1]

data_samples = train.as_numpy_iterator().next()

res = data_samples.next()

fig, axes = plt.subplots(ncols=4, figsize=(20, 20))
for i in range(4):
    samples_image = res[0][i]
    sample_coords = res[1][1][i]

    cv2.rectangle(samples_image, 
                  tuple(np.multiply(sample_coords[:2], [120, 120]).astype(int)),
                  tuple(np.multiply(sample_coords[2:], [120, 120]).astype(int)),
                  (255, 0, 0), 2)

axes[i].imshow(samples_image)

    

# Functional API

In [None]:
from keras.models import Model
from keras.layers import Input, Conv2D, Dense, GlobalMaxPooling2D
from keras.applications import VGG16

vgg = VGG16(include_top=False)

vgg.summary()

In [None]:
#building classifer 
def build_model():
    model = Sequential()
    model.add(Conv2D(8*baseMapNum, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay),
                     input_shape=(IMAGE_H, IMAGE_W, 3),activation=LeakyReLU(alpha=0.1)))
    model.add(BatchNormalization())

    model.add(Conv2D(8*baseMapNum, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay),activation=LeakyReLU(alpha=0.1)))
    model.add(BatchNormalization())

    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.1))

    model.add(Conv2D(4 * baseMapNum, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay),activation=LeakyReLU(alpha=0.1)))
    model.add(BatchNormalization())

    model.add(Conv2D(4 * baseMapNum, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay),activation=LeakyReLU(alpha=0.1)))
    model.add(BatchNormalization())

    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.1))  

    model.add(Conv2D(2 * baseMapNum, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay),activation=LeakyReLU(alpha=0.1)))
    model.add(BatchNormalization())

    model.add(Conv2D(2 * baseMapNum, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay),activation=LeakyReLU(alpha=0.1)))
    model.add(BatchNormalization())

    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.1))  
    
    model.add(Conv2D(2 * baseMapNum, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay),activation=LeakyReLU(alpha=0.1)))
    model.add(BatchNormalization())

    model.add(MaxPooling2D(pool_size=(4, 4)))
    model.add(Dropout(0.1))  
   
    model.add(Conv2D(baseMapNum, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay),activation=LeakyReLU(alpha=0.1)))
    model.add(BatchNormalization())

    model.add(Conv2D(baseMapNum, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay),activation=LeakyReLU(alpha=0.1)))
    model.add(BatchNormalization())

    model.add(Conv2D(BOX * (4 + 1), (1,1), strides=(1,1)))
    model.add(Reshape((GRID_H, GRID_W,BOX, 4 + 1)))

    return model

faceDetectionModel = build_model()
faceDetectionModel.summary()

In [None]:
X, y = train.as_numpy_iterator().next()
X.shape

classes, coords = faceDetectionModel.predict(X)

classes, coords


# Losses

In [None]:
faceDetectionModel.compile(loss=custom_loss, optimizer=tf.keras.optimizers.Adam())

In [None]:
def custom_loss(y_true, y_pred):
    mask_shape = tf.shape(y_true)[:4]
    print("Mask",np.shape(mask_shape))

    cell_x = tf.to_float(tf.reshape(tf.tile(tf.range(GRID_W), [GRID_H]), (1, GRID_H, GRID_W, 1, 1)))
    print("cell_x",np.shape(cell_x))
    cell_y = cell_x
    print("cell_y",np.shape(cell_y))

    cell_grid = tf.tile(tf.concat([cell_x,cell_y], -1), [BATCH_SIZE, 1, 1, 5, 1])
    print("cell_grid",np.shape(cell_grid))

    coord_mask = tf.zeros(mask_shape)
    conf_mask  = tf.zeros(mask_shape)
    print("coord_mask",np.shape(coord_mask))
    
    seen = tf.Variable(0.)
    total_recall = tf.Variable(0.)
    
    """
    Adjust prediction
    """
    pred_box_xy = tf.sigmoid(y_pred[..., :2]) + cell_grid
    pred_box_wh = tf.exp(y_pred[..., 2:4]) * np.reshape(ANCHORS, [1,1,1,BOX,2])    
    pred_box_conf = tf.sigmoid(y_pred[..., 4])
    """
    Adjust ground truth
    """
    true_box_xy = y_true[..., 0:2] 
    true_box_wh = y_true[..., 2:4] 
    true_box_conf = y_true[..., 4]
    """
    Determine the masks
    """
    ### coordinate mask: simply the position of the ground truth boxes (the predictors)
    coord_mask = tf.expand_dims(y_true[..., 4], axis=-1) * COORD_SCALE
    """
    Warm-up training
    """
    no_boxes_mask = tf.to_float(coord_mask < COORD_SCALE/2.)
    
    seen = tf.assign_add(seen, 1.)
    true_box_xy, true_box_wh, coord_mask = tf.cond(tf.less(seen, WARM_UP_BATCHES),
                   lambda: [true_box_xy + (0.5) * no_boxes_mask, 
                   true_box_wh + tf.ones_like(true_box_wh) * np.reshape(ANCHORS, [1,1,1,BOX,2])* no_boxes_mask, tf.ones_like(coord_mask)],
                   lambda: [true_box_xy, true_box_wh, coord_mask])
    
 
    """
    Finalize the loss
    """
    nb_coord_box = tf.reduce_sum(tf.to_float(coord_mask > 0.0))
    # nb_conf_box  = tf.reduce_sum(tf.to_float(conf_mask  > 0.0))

    loss_xy    = tf.reduce_sum(tf.square(true_box_xy-pred_box_xy)     * coord_mask) / (nb_coord_box + 1e-6) / 2.
    loss_wh    = tf.reduce_sum(tf.square(tf.sqrt(true_box_wh)-tf.sqrt(pred_box_wh))     * coord_mask) / (nb_coord_box + 1e-6) / 2.
    loss_conf  = tf.reduce_sum(tf.square(true_box_conf-pred_box_conf))
    loss = loss_xy + loss_wh + loss_conf
    
    return loss

# Old losses

In [None]:
batches_per_epoch = len(train_images)
lr_decay = (1./0.75-1)/batches_per_epoch

optimiser = tf.keras.optimizers.Adam(learning_rate=0.0001, decay=lr_decay)

def localisation_loss(y_true, yhat):
    delta_coord = tf.reduce_sum(tf.square(y_true[:,:2] - yhat[:,:2]))

    h_true = y_true[:,3] - y_true[:,1]
    w_true = y_true[:,2] - y_true[:,0]

    h_pred = yhat[:,3] - yhat[:,1]
    w_pred = yhat[:,2] - yhat[:,0]

    delta_size = tf.reduce_sum(tf.square(tf.sqrth_true - w_true) + tf.square(h_pred - w_pred))

    return delta_coord + delta_size

classloss = tf.keras.losses.BinaryCrossentropy()
regloss = localisation_loss

localisation_loss(y[1], coords)

classloss(y[0], classes)

regloss(y[1], coords)

# Train CNN

In [None]:
class FaceDetection(Model):
      def __init__(self, facecctv, **kwargs):
        super().__init__(**kwargs)
        self.model = facecctv

      def compile(self, optimiser, classloss, localisationloss, **kwargs):
          super().compile(**kwargs)
          self.classloss = classloss
          self.localisationloss = localisationloss
          self.optimiser = optimiser
      
      def train_step(self, batch, **kwargs):
          
            X, y = batch

            with tf.GradientTape() as tape:
                  classes, coords = self.model(X, training=True)

                  batch_class_loss = self.classloss(y[0], classes)

                  batch_localisation_loss = self.localisationloss(tf.cast(y[1], tf.float32), coords)

                  total_loss = 0.5*batch_class_loss + batch_localisation_loss

                  gradients = tape.gradient(total_loss, self.model.trainable_variables)

            optimiser.apply_gradients(zip(gradients, self.model.trainable_variables))

            return {"class_loss": batch_class_loss, "localisation_loss": batch_localisation_loss, "total_loss": total_loss}
      
      def test_step(self, batch, **kwargs):
            X, y = batch

            classes, coords = self.model(X, training=False)

            batch_class_loss = self.classloss(y[0], classes)

            batch_localisation_loss = self.localisationloss(tf.cast(y[1], tf.float32), coords)

            total_loss = 0.5*batch_class_loss + batch_localisation_loss

            return {"class_loss": batch_class_loss, "localisation_loss": batch_localisation_loss, "total_loss": total_loss}
          
      def call(self, X, **kwargs):
            return self.model(X, **kwargs)
      
model = FaceDetection(faceDetectionModel)
model.compile(optimiser, classloss, regloss)

logdir = 'logs'
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir = logdir, histogram_freq = 1)
hist = model.fit(train, epochs=10, validation_data=val, callbacks=[tensorboard_callback])

hist.history

fig, axes = plt.subplots(ncols=3, figsize=(20, 5))

axes[0].plot(hist.history['total_loss'], color='teal', label='total_loss')
axes[0].plot(hist.history['val_total_loss'], color='orange', label='val_total_loss')
axes[0].set_title('Total Loss')
axes[0].legend()

axes[1].plot(hist.history['class_loss'], color='teal', label='class_loss')
axes[1].plot(hist.history['val_class_loss'], color='orange', label='val_class_loss')
axes[1].set_title('Classification Loss')
axes[1].legend()

axes[2].plot(hist.history['localisation_loss'], color='teal', label='localisation_loss')
axes[2].plot(hist.history['val_localisation_loss'], color='orange', label='val_localisation_loss')
axes[2].set_title('Localisation Loss')
axes[2].legend()

plt.show()

# Train model

In [None]:
i=0
i, x, y = GenerateBatch(i,500)
print(i)
x_train, x_test, y_train ,y_test = train_test_split(x, y, test_size=0.2, random_state=50)
print(np.shape(y_train))
y_train = np.reshape(y_train,(len(x_train),13, 13, BOX, 5))
print(np.shape(y_train))
     

In [None]:
faceDetectionModel.fit(x_train, y_train, epochs=50, batch_size=BATCH_SIZE, validation_data=(x_test,y_test), verbose=1)
faceDetectionModel.save_weights("faceDetectionModel.h5")

# Test model

In [None]:
print(np.exp(0.5))
print(np.log(np.exp(0.05)))
     

import math

def sigmoid(x):
  return 1 / (1 + math.exp(-x))

_, tempx, ty = GenerateBatch(0,1000)
img = tempx[0]
true_box_wh = ty[..., 2:4]
print(np.shape(true_box_wh))
i = ty[0].reshape((GRID_H * GRID_W,5))[67]
print(i)
Nx,Ny, Nw, Nh = ReAdjustCord(i[0]*(8/GRID_W),i[1]*(5/GRID_H), i[2], i[3],img)
cv2.rectangle(img, (int(Nx),int(Ny)), (int(Nw)+int(Nx),int(Nh)+int(Ny)), (255,0,0)) 
cv2.imshow(img)
print(Nx,Ny, Nw, Nh)
tempy = model.predict(tempx)
i = tempy[0].reshape((GRID_H * GRID_W,5))[67]
Nx,Ny, Nw, Nh = sigmoid(i[0]),sigmoid(i[1]), np.exp(i[2]), np.exp(i[3])
print(Nx,Ny, Nw, Nh)
Nx,Ny, Nw, Nh = ReAdjustCord(Nx*(8/GRID_W),Ny*(5/GRID_H), Nw, Nh,img)
print(Nx,Ny, Nw, Nh)
cv2.rectangle(img, (int(Nx),int(Ny)), (int(Nw)+int(Nx),int(Nh)+int(Ny)), (255,0,0)) 
cv2.imshow(img)

In [None]:
for i in range(GRID_H * GRID_W):
  tt = ty[0].reshape((GRID_H * GRID_W,5))[i]
  if(tt[4] > 0.5):
    print(tt)
    print(i)

In [None]:
_, tempx, ty = GenerateBatch(0,1000)
tempy = model.predict(tempx)
img = tempx[0]
print(np.shape(tempy[0]))
for idx,i in enumerate(ty[0].reshape((9,5))):
    if(float(i[4]) > 0.5):
      print("ssss",i,idx)
      Nx,Ny, Nw, Nh = sigmoid(i[0]),sigmoid(i[1]), i[2], i[3]
      print(Nx,Ny, Nw, Nh)
      Nx,Ny, Nw, Nh = ReAdjustCord(i[0],i[1], i[2], i[3],img)
      print(Nx,Ny, Nw, Nh)
      cv2.rectangle(img, (int(Nx),int(Ny)), (int(Nw)+int(Nx),int(Nh)+int(Ny)), (255,0,0)) 
      cv2.imshow(img)

In [None]:
model.save_weights("weights.h5")

In [None]:
predicted = model.predict(x)
predicted = np.reshape(predicted,(1,3*3,5))
for i in predicted[0]:
      if(float(i[0]) > 0.5):
            print(i)
print(np.sum(predicted,axis = 1))
print(np.sum(np.reshape(y,(1,3*3,5)),axis = 1))

# Predictions 

In [None]:
test_data = test.as_nupmy_iterator().next()
test_samples = test_data.next()
yhat = faceDetectionModel.predict(test_samples[0])

fig, axes = plt.subplots(ncols=4, figsize=(20, 20))
for i in range(4):
      sample_image = test_samples[0][i]
      sample_coords = yhat[1][i]

      if yhat[0][i] > 0.9:
            cv2.rectangle(sample_image,
                        tuple(np.multiply(sample_coords[:2], 120).astype(int)),
                        tuple(np.multiply(sample_coords[2:], 120).astype(int)),
                        (255, 0, 0), 2)
            
      axes[i].imshow(sample_image)

# Save model

In [None]:
from keras.models import load_model

faceDetectionModel.save('faceDetectionModel.h5')
faceDetectionModel = load_model('faceDetectionModel.h5', custom_objects={'localisation_loss': localisation_loss})

faceDetectionModel.summary()

def detect_face(image, model):
      image = cv2.resize(image, (120, 120))
      image = image.astype('float32')/255.
      image = np.expand_dims(image, axis=0)
      classes, coords = model.predict(image)
      if classes[0][0] > 0.9:
            return coords[0]
      else:
            return None
