<a href="https://colab.research.google.com/github/Kapernikov/workshop-face-and-plate-recognition/blob/main/Workshop_plate_and_face_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Dependencies 

In [None]:
!git clone https://github.com/Kapernikov/workshop-face-and-plate-recognition.git

In [None]:
!cd workshop-face-and-plate-recognition/ ; git pull

In [None]:
!pip install -r workshop-face-and-plate-recognition/requirements.txt

In [None]:
import cv2
import onnxruntime as ort
import numpy as np

# Pre process
Preprocessing is model dependant, the steps proposed here where done for the given models.

We can see that model are size dependant which will be an issue while using the big images of the campaign (8000x4000) because we are gonna loose all details.

The model is as well color models dependant, both use RGB here. And representation dependant. One model use [-1,1] to represent color while the other use [0,1]. These come from the images format use to train the model.
We use explicite type to avoid an error of the automatic typer.

The last necessary thing to do is to match the input shema, cv2 gives us [height, width, colors] and both model takes [image number, colors, height, width].

In [None]:
def preprocess_for_face_detection(frame):
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (640, 480))
    
    from google.colab.patches import cv2_imshow
    print("------ Input image for face detection  ------")
    cv2_imshow(img)
    print("---------------------------------------------")

    img = (img - np.array([127, 127, 127])) / 128
    img = np.transpose(img, [2, 0, 1])
    img = np.expand_dims(img, axis=0)
    img = img.astype(np.float32)

    return img

In [None]:
def preprocess_for_plate_detection(frame):
    IN_IMAGE_H = ort_session_plate_model.get_inputs()[0].shape[2]
    IN_IMAGE_W = ort_session_plate_model.get_inputs()[0].shape[3]

    resized = cv2.resize(frame, (IN_IMAGE_W, IN_IMAGE_H), interpolation=cv2.INTER_LINEAR)
    img_in = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)

    from google.colab.patches import cv2_imshow
    print("------ Input image for plate detection ------")
    cv2_imshow(img_in)
    print("---------------------------------------------")

    img_in = np.transpose(img_in, (2, 0, 1)).astype(np.float32)
    img_in = np.expand_dims(img_in, axis=0)
    img_in /= 255.0
    return img_in.astype(np.float32)

# Run the model

## Models output format
- boxes: a list of pair of points defining boxe where object have been detected.
- confidences: contains a list of confidence level for each box inside the boxes variable. The first and second values of one confidence pair indicate the probability of containing background and face respectively. It means that our model have two classes that he can detect, namely background and face, and give the probability for each of them.

## Face detection model (pre-trained)

In [None]:
ort_session_face_model = ort.InferenceSession("""workshop-face-and-plate-recognition//models//ultra_light_640.onnx""")
input_name_face_model = ort_session_face_model.get_inputs()[0].name

In [None]:
def predict_face_area(input_data):
    confidences, boxes = ort_session_face_model.run(None, {input_name_face_model: input_data})
    return confidences, boxes

## License plate detection model (pre-trained)

In [None]:
# This model doesn't fit on the git
!gdown 16NPM0SGP-p6R3VNb9eB6rqM7-E2-EjKX -O workshop-face-and-plate-recognition/models/yolov4_1_3_416_416_static.onnx

In [None]:
#ort_session_plate_model = ort.InferenceSession("""/content/drive/MyDrive/onnx_model/yolov4_1_3_416_416_static.onnx""")
ort_session_plate_model = ort.InferenceSession("""/content/workshop-face-and-plate-recognition/models/yolov4_1_3_416_416_static.onnx""")
input_name_plate_model = ort_session_plate_model.get_inputs()[0].name

In [None]:
def predict_plate(input_data):
    boxes, confidences = ort_session_plate_model.run(None, {input_name_plate_model: input_data})
    return confidences, boxes[:,:,0,:]

# post process

1. For each box, we want to define its class. Therefor, for each class (except the background one), we will take the confidence index that are above a threshold:
    	probs = confidences[:, class_index]
    	mask = probs > prob_threshold  # return boolean array that is True if the probability is above threshold, else it is False
    	probs = probs[mask]
    	subset_boxes = boxes[mask, :]

What class do we have?


The first model gives us the probability of being part of the surrounding and the probability of being a face, the second model gives us the probability of being a license plate. This gives un 3 classes.

2. On each of these boxe/probability pair we will perform a non-maximum-suppression, therefore we will define a intersection-over-union function as a function returning the overlapping area / non-overlapping area

To perform the actual non-maximum-suppression, we will:
- select the remaining candidate with the highest score
- remove the candidate with an intersection-over-union above a threshold
- until there are no remaining candidates



In [None]:
def get_area_of(left_top, right_bottom):
    hw = np.clip(right_bottom - left_top, 0.0, None)
    return hw[..., 0] * hw[..., 1]

In [None]:
def get_iou_of(boxes0, boxes1):
    overlap_left_top = np.maximum(boxes0[..., :2], boxes1[..., :2])
    overlap_right_bottom = np.minimum(boxes0[..., 2:], boxes1[..., 2:])

    overlap_area = get_area_of(overlap_left_top, overlap_right_bottom)
    area0 = get_area_of(boxes0[..., :2], boxes0[..., 2:])
    area1 = get_area_of(boxes1[..., :2], boxes1[..., 2:])
    return overlap_area / (area0 + area1 - overlap_area + 1e-5)

In [None]:
def proceed_hard_nms(box_scores, iou_threshold, top_k=-1, candidate_size=200):
    scores = box_scores[:, -1]
    boxes = box_scores[:, :-1]
    picked = []
    indexes = np.argsort(scores)
    indexes = indexes[-candidate_size:]
    while len(indexes) > 0:
        current = indexes[-1]
        picked.append(current)
        if 0 < top_k == len(picked) or len(indexes) == 1:
            break
        current_box = boxes[current, :]
        indexes = indexes[:-1]
        rest_boxes = boxes[indexes, :]
        iou = get_iou_of(
            rest_boxes,
            np.expand_dims(current_box, axis=0),
        )
        indexes = indexes[iou <= iou_threshold]

    return box_scores[picked, :]

In [None]:
def postprocess(width, height, confidences, boxes, prob_threshold, iou_threshold=0.5, top_k=-1, skip_n_class=0):
    boxes = boxes[0]
    confidences = confidences[0]
    picked_box_probs = []
    picked_labels = []
    for class_index in range(skip_n_class, confidences.shape[1]):
        probs = confidences[:, class_index]
        mask = probs > prob_threshold
        probs = probs[mask]
        if probs.shape[0] == 0:
            continue
        subset_boxes = boxes[mask, :]
        box_probs = np.concatenate([subset_boxes, probs.reshape(-1, 1)], axis=1)
        box_probs = proceed_hard_nms(box_probs,
           iou_threshold=iou_threshold,
           top_k=top_k,
           )
        picked_box_probs.append(box_probs)
        picked_labels.extend([class_index] * box_probs.shape[0])
    if not picked_box_probs:
        return np.array([]), np.array([]), np.array([])
    picked_box_probs = np.concatenate(picked_box_probs)
    picked_box_probs[:, 0] *= width
    picked_box_probs[:, 1] *= height
    picked_box_probs[:, 2] *= width
    picked_box_probs[:, 3] *= height
    return picked_box_probs[:, :4].astype(np.int32), np.array(picked_labels), picked_box_probs[:, 4]

# Process zone of interest

## Blur the bounding boxes 
**several methods:**
1. cv2.blur() will apply a kernel on the desired portion of the image to blur it. BUT as it use a mathematical function to do it, it might be reversible.

2. Set the color to a fix value in the region of interest. Remove the information completely!

3. Interpolate the region of interest from a very low resolution
Remove part of the information, the lower the resolution is the more distant the final result will be from the original image
Smoother than the fixed color rectangle


In [None]:
def blur_box(frame, box):
    h, w, _ = frame.shape
    x1, y1, x2, y2 = box

    roi_buffer = max((x2-x1), (y2-y1))//4
    x1_buffered = max(0, x1-roi_buffer)
    y1_buffered = max(0, y1-roi_buffer)
    x2_buffered = min(w, x2+roi_buffer)
    y2_buffered = min(h, y2+roi_buffer)

    roi = frame[y1_buffered:y2_buffered, x1_buffered:x2_buffered]
    roi = cv2.resize(roi, (8, 8), interpolation=cv2.INTER_NEAREST)
    roi = cv2.resize(roi, (x2_buffered-x1_buffered, y2_buffered-y1_buffered), interpolation=cv2.INTER_NEAREST)
    frame[y1_buffered:y2_buffered, x1_buffered:x2_buffered] = roi

In [None]:
def draw_box(frame, box, label):
    h, w, _ = frame.shape
    x1, y1, x2, y2 = box

    roi_buffer = max((x2-x1), (y2-y1))//4
    x1_buffered = max(0, x1-roi_buffer)
    y1_buffered = max(0, y1-roi_buffer)
    x2_buffered = min(w, x2+roi_buffer)
    y2_buffered = min(h, y2+roi_buffer)

    cv2.rectangle(frame, (x1, y1), (x2, y2), (80,18,236), 2)
    cv2.rectangle(frame, (x1, y2 - 20), (x2, y2), (80,18,236), cv2.FILLED)
    font = cv2.FONT_HERSHEY_DUPLEX
    text = f"label: {label}"
    cv2.putText(frame, text, (x1 + 6, y2 - 6), font, 0.5, (255, 255, 255), 1)

# Glue all together

In [None]:
!wget -c https://media.wired.com/photos/5926f91dcfe0d93c47431f76/master/w_1600%2Cc_limit/Top-Gear-Series-23-Preview_11_35MB.jpg -O image_test.jpg

In [None]:
file_name = "image_test.jpg"
video_capture = cv2.VideoCapture(file_name)

ret, frame = video_capture.read()

input_data = preprocess_for_face_detection(frame)
input_data_plate = preprocess_for_plate_detection(frame)

confidences_face, boxes_face = predict_face_area(input_data)
confidences_plate, boxes_plate = predict_plate(input_data_plate)

boxes = np.concatenate((boxes_face, boxes_plate), axis=1)
confidences = np.concatenate((np.insert(confidences_face, 2, 0, axis=2), np.insert(confidences_plate, (0,0), 0, axis=2)), axis=1)

h, w, _ = frame.shape
boxes, labels, probs = postprocess(w, h, confidences, boxes, 0.5, skip_n_class=1)

frame_result_1 = np.array(frame)
frame_result_2 = np.array(frame)

for i in range(boxes.shape[0]):
    box = boxes[i, :]
    blur_box(frame_result_2, box)
    draw_box(frame_result_1, box, labels[i])

# cv2.imshow('Image', frame)
from google.colab.patches import cv2_imshow
print("------         Original image         ------")
cv2_imshow(frame)
print("------         Anotated image         ------")
cv2_imshow(frame_result_1)
print("------         blurred  image         ------")
cv2_imshow(frame_result_2)
print("--------------------------------------------")

video_capture.release()
cv2.destroyAllWindows()