Processing dataframe with label

In [None]:
def process_bbox(data):
  '''
    convert string list bbox to 4 columns
  '''
  data['bbox'] = data['bbox'].str.strip('[')
  data['bbox'] = data['bbox'].str.strip(']')
  data[['x','y','w','h']] = data['bbox'].str.split(',', expand=True).astype(float).astype(int)
  return data

# IOU

In [None]:
# Convert x1, y1, x2, y2 -> x, y, w, h (top left -> bottom right)
def convert_corr(bbox):
  x1, y1,x2, y2 = bbox
  return [x1, y1, x2-x1, y2-y1]

In [None]:
def iou(gt, pred):

	# bbox = x, y, w, h -> x, y, x+w, y+h

	gt_tmp = [gt[0], gt[1], gt[0] + gt[2], gt[1] + gt[3]]
	pred_tmp = [pred[0], pred[1], pred[0] + gt[2], pred[1] + pred[3]]

	xA = max(gt_tmp[0], pred_tmp[0])
	yA = max(gt_tmp[1], pred_tmp[1])
	xB = min(gt_tmp[2], pred_tmp[2])
	yB = min(gt_tmp[3], pred_tmp[3])

	interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)

	gtArea = (gt_tmp[2] - gt_tmp[0] + 1) * (gt_tmp[3] - gt_tmp[1] + 1)
	predArea = (pred_tmp[2] - pred_tmp[0] + 1) * (pred_tmp[3] - pred_tmp[1] + 1)

	iou = interArea / float(gtArea + predArea - interArea)
	return iou

# Image Processing

## Alignment

In [None]:
import math
from typing import Union
from PIL import Image

def findEuclideanDistance(
    source_representation: Union[np.ndarray, list], test_representation: Union[np.ndarray, list]
) -> float:
    """
    Find euclidean distance between 2 vectors
    Args:
        source_representation (numpy array or list)
        test_representation (numpy array or list)
    Returns
        distance
    """
    if isinstance(source_representation, list):
        source_representation = np.array(source_representation)

    if isinstance(test_representation, list):
        test_representation = np.array(test_representation)

    euclidean_distance = source_representation - test_representation
    euclidean_distance = np.sum(np.multiply(euclidean_distance, euclidean_distance))
    euclidean_distance = np.sqrt(euclidean_distance)
    return euclidean_distance

def alignment_procedure(img: np.ndarray, left_eye: tuple, right_eye: tuple, nose: tuple):
    """
    Alignma given face with respect to the left and right eye coordinates.
    Left eye is the eye appearing on the left (right eye of the person). Left top point is (0, 0)
    Args:
        img (numpy array): given image
        left_eye (tuple): left eye coordinates.
            Left eye is appearing on the left of image (right eye of the person)
        right_eye (tuple): right eye coordinates.
            Right eye is appearing on the right of image (left eye of the person)
        nose (tuple): coordinates of nose
    """

    left_eye_x, left_eye_y = left_eye
    right_eye_x, right_eye_y = right_eye

    # -----------------------
    # find rotation direction
    if left_eye_y > right_eye_y:
        point_3rd = (right_eye_x, left_eye_y)
        direction = -1  # rotate same direction to clock
    else:
        point_3rd = (left_eye_x, right_eye_y)
        direction = 1  # rotate inverse direction of clock

    # -----------------------
    # find length of triangle edges

    a = findEuclideanDistance(np.array(left_eye), np.array(point_3rd))
    b = findEuclideanDistance(np.array(right_eye), np.array(point_3rd))
    c = findEuclideanDistance(np.array(right_eye), np.array(left_eye))

    # -----------------------
    # apply cosine rule
    if b != 0 and c != 0:  # this multiplication causes division by zero in cos_a calculation

        cos_a = (b * b + c * c - a * a) / (2 * b * c)

        # PR15: While mathematically cos_a must be within the closed range [-1.0, 1.0],
        # floating point errors would produce cases violating this
        # In fact, we did come across a case where cos_a took the value 1.0000000169176173
        # which lead to a NaN from the following np.arccos step
        cos_a = min(1.0, max(-1.0, cos_a))

        angle = np.arccos(cos_a)  # angle in radian
        angle = (angle * 180) / math.pi  # radian to degree

        # -----------------------
        # rotate base image

        if direction == -1:
            angle = 90 - angle

        img = Image.fromarray(img)
        img = np.array(img.rotate(direction * angle))

    # -----------------------

    return img

## Detect and Extract face (use **InsightFace**)

In [None]:
"""
import cv2
import numpy as np
import insightface
from insightface.app import FaceAnalysis
from insightface.data import get_image as ins_get_image

app = FaceAnalysis(allowed_modules=['detection']) # enable detection model only
app.prepare(ctx_id=0, det_size=(640, 640))

"""

def extract_insight_face(app, img):
    faces = app.get(img)
    resp = []

    expand_face_area = 2
    for face in faces:
      bbox = face['bbox']
      x = bbox[0]
      y = bbox[1]
      w = bbox[2]
      h = bbox[3]

      # expand the facial area to be extracted and stay within img.shape limits
      x1 = max(0, x - int((w * expand_face_area) / 100))  # expand left
      y1 = max(0, y - int((h * expand_face_area) / 100))  # expand top
      x2 = min(img.shape[1], w + int((w * expand_face_area) / 100))  # expand right
      y2 = min(img.shape[0], h + int((h * expand_face_area) / 100))  # expand bottom

      facial_img = img[int(y1):int(y2), int(x1):int(x2)]

      #Face alignment
      landmarks = face['kps']
      left_eye = landmarks[1]
      right_eye = landmarks[0]
      nose = landmarks[2]
      # mouth_right = landmarks["mouth_right"]
      # mouth_left = landmarks["mouth_left"]
      facial_img = alignment_procedure(facial_img, right_eye, left_eye, nose)

      result = {'facial_image': facial_img[:, :, ::-1],
              'bbox': convert_corr(bbox)}

      resp.append(result)

  return resp

In [None]:
detector = MTCNN()
def extract_mtcnn(detector, img):
  faces = detector.detect_faces(img)

  resp = []

  expand_face_area = 2
  for face in faces:
    bbox = face['box']
    x = bbox[0]
    y = bbox[0]
    w = bbox[0] + bbox[2]
    h = bbox[0] + bbox[3]

    # expand the facial area to be extracted and stay within img.shape limits
    x1 = max(0, x - int((w * expand_face_area) / 100))  # expand left
    y1 = max(0, y - int((h * expand_face_area) / 100))  # expand top
    x2 = min(img.shape[1], w + int((w * expand_face_area) / 100))  # expand right
    y2 = min(img.shape[0], h + int((h * expand_face_area) / 100))  # expand bottom

    facial_img = img[int(y1):int(y2), int(x1):int(x2)]

    #Face alignment
    landmarks = face['keypoints']
    left_eye = landmarks['left_eye']
    right_eye = landmarks['right_eye']
    nose = landmarks['nose']
    # mouth_right = landmarks["mouth_right"]
    # mouth_left = landmarks["mouth_left"]
    facial_img = alignment_procedure(facial_img, right_eye, left_eye, nose)

    result = {'facial_image': facial_img[:, :, ::-1],
            'bbox': bbox}

    resp.append(result)

  return resp

## Detect and crop all images in folder

In [None]:
def read_image(filename):
    dir_img = f'/content/drive/MyDrive/AI_HACKATHON_NEWBEES/data_processed/train/{filename}'
    img = cv2.imread(dir_img)
    return img

In [None]:
batch_size = 500
num_batches = len(noface_files) // batch_size + (len(noface_files) % batch_size > 0)

In [None]:
def crop_folder_image(img_list, batch_size = 500):
  """
  Function detect face + alignment -> crop face -> save
  Args:
    img_list: List image file name
    batch_size: Number of images processed in 1 batch
  """

  num_batches = len(img_list) // batch_size + (len(img_list) % batch_size > 0)

  for batch_index in range(num_batches):
    print("-------------------------------------------")
    print(f"Processing batch {batch_index}")

    start_index = batch_index * batch_size
    end_index = min((batch_index + 1) * batch_size, len(img_list))

    # Lấy batch dữ liệu từ list file
    batch_data = img_list[start_index:end_index]
    count = 0
    count_zero = 0
    for filename in batch_data:
      img = read_image(filename)
      result = extract_insight_face(img)

      for i, face in enumerate(result):
        if len(result) == 0:
          count_zero += 1
        if len(result) != 1:
          print(f'File {filename} detected {len(result)} face(s).')

        # Resize
        face['facial_image'] = cv2.resize(face, (256,256))

        # Save
        if i > 0:
          filename = f'{i}_' + filename

        save_file_name = f'/content/drive/MyDrive/AI_HACKATHON_NEWBEES/data_processed/p/{filename}'
        try:
          if not os.path.exists(save_file_name):
              cv2.imwrite(save_file_name, face[:, :, ::-1])
          else:
              print(f'File {save_file_name} existed.')

        except Exception as e:
          print(f'Error when save file {filename}: {e}')

        count += 1

    print(f"Finish batch {batch_index}.")
    print(f"Cropped {count} files.")
    print(f"Number of images cannot detect any face: {count_zero}")