In [1]:
import numpy as np
import os
import cv2

import torch
from torchvision import transforms
import matplotlib.pyplot as plt

from PIL import Image

  from .autonotebook import tqdm as notebook_tqdm
  _torch_pytree._register_pytree_node(


In [2]:
data_src_folder = "C:\\BME\\7_felev\\szakdoli\\dataset\\FER2013Plus\\numpy_data"

train_images = np.load(os.path.join(data_src_folder, 'train_images.npy'))
train_labels = np.load(os.path.join(data_src_folder, 'train_labels.npy'))

val_images = np.load(os.path.join(data_src_folder, 'val_images.npy'))
val_labels = np.load(os.path.join(data_src_folder, 'val_labels.npy'))

test_images = np.load(os.path.join(data_src_folder, 'test_images.npy'))
test_labels = np.load(os.path.join(data_src_folder, 'test_labels.npy'))

print(f"train_images shape: {train_images.shape}, train_labels shape: {train_labels.shape}")
print(f"train_images shape: {val_images.shape}, train_labels shape: {val_labels.shape}")
print(f"train_images shape: {test_images.shape}, train_labels shape: {test_labels.shape}")

train_images shape: (28389, 48, 48), train_labels shape: (28389, 8)
train_images shape: (3546, 48, 48), train_labels shape: (3546, 8)
train_images shape: (3553, 48, 48), train_labels shape: (3553, 8)


In [3]:
from torch.utils.data import Dataset, DataLoader
class FERP_PreDataset(Dataset):
  def __init__(self, x_face, y_lab, transform):
    super(FERP_PreDataset,self).__init__()
    self.x_face = x_face
    self.y_lab = y_lab
    self.transform = transform 

  def __len__(self):
    return len(self.y_lab)
  
  def __getitem__(self, index):
    image_face = self.x_face[index]
    label = self.y_lab[index]
    
    image_face = np.array(image_face, dtype=np.uint8)
    image_face = cv2.cvtColor(image_face, cv2.COLOR_GRAY2RGB)

    return self.transform(image_face), torch.tensor(label, dtype=torch.float32)

In [4]:
batch_size = 32

train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
])
test_transform = transforms.Compose([transforms.ToPILImage(),
                                     transforms.Resize((224, 224))])

train_dataset = FERP_PreDataset(train_images, train_labels, \
                                  train_transform)
val_dataset = FERP_PreDataset(val_images, val_labels, \
                                test_transform)
test_dataset = FERP_PreDataset(test_images, test_labels, \
                                 test_transform)

train_loader = DataLoader(train_dataset, batch_size, shuffle=False, drop_last=True)
val_loader = DataLoader(val_dataset, batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size, shuffle=False) 

print ('train loader ', len(train_loader), 'val loader ', len(val_loader), 'test', len(test_loader))

train loader  887 val loader  111 test 112


In [5]:
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

model_path_local = 'C:\\BME\\7_felev\\szakdoli\\models\\mediapipe\\face_landmarker_v2_with_blendshapes.task'

base_options = python.BaseOptions(model_asset_path=model_path_local)
options = vision.FaceLandmarkerOptions(base_options=base_options,
                                        output_face_blendshapes=True,
                                        output_facial_transformation_matrixes=True,
                                        num_faces=1)
detector = vision.FaceLandmarker.create_from_options(options)




In [6]:
# Visualization code by google
#@markdown We implemented some functions to visualize the face landmark detection results. <br/> Run the following cell to activate the functions.

from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import drawing_utils_copy


def draw_landmarks_on_image(rgb_image, detection_result, ):
  face_landmarks_list = detection_result.face_landmarks
  annotated_image = np.copy(rgb_image)

  # Loop through the detected faces to visualize.
  for idx in range(len(face_landmarks_list)):
    face_landmarks = face_landmarks_list[idx]

    # Draw the face landmarks.
    face_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    face_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in face_landmarks
    ])

    drawing_utils_copy.draw_landmarks(
        image=annotated_image,
        landmark_list=face_landmarks_proto,
        connections=mp.solutions.face_mesh.FACEMESH_TESSELATION,
        landmark_drawing_spec=None,
        connection_drawing_spec=mp.solutions.drawing_styles
        .get_default_face_mesh_tesselation_style())
    drawing_utils_copy.draw_landmarks(
        image=annotated_image,
        landmark_list=face_landmarks_proto,
        connections=mp.solutions.face_mesh.FACEMESH_CONTOURS,
        landmark_drawing_spec=None,
        connection_drawing_spec=mp.solutions.drawing_styles
        .get_default_face_mesh_contours_style())
    drawing_utils_copy.draw_landmarks(
        image=annotated_image,
        landmark_list=face_landmarks_proto,
        connections=mp.solutions.face_mesh.FACEMESH_IRISES,
          landmark_drawing_spec=None,
          connection_drawing_spec=mp.solutions.drawing_styles
          .get_default_face_mesh_iris_connections_style())

  return annotated_image

def plot_face_blendshapes_bar_graph(face_blendshapes):
  # Extract the face blendshapes category names and scores.
  face_blendshapes_names = [face_blendshapes_category.category_name for face_blendshapes_category in face_blendshapes]
  face_blendshapes_scores = [face_blendshapes_category.score for face_blendshapes_category in face_blendshapes]
  # The blendshapes are ordered in decreasing score value.
  face_blendshapes_ranks = range(len(face_blendshapes_names))

  fig, ax = plt.subplots(figsize=(12, 12))
  bar = ax.barh(face_blendshapes_ranks, face_blendshapes_scores, label=[str(x) for x in face_blendshapes_ranks])
  ax.set_yticks(face_blendshapes_ranks, face_blendshapes_names)
  ax.invert_yaxis()

  # Label each bar with values
  for score, patch in zip(face_blendshapes_scores, bar.patches):
    plt.text(patch.get_x() + patch.get_width(), patch.get_y(), f"{score:.4f}", va="top")

  ax.set_xlabel('Score')
  ax.set_title("Face Blendshapes")
  plt.tight_layout()
  plt.show()

### Calculate and save the mask for one image

In [7]:
# normalizer code from mediapipe

from typing import List, Mapping, Optional, Tuple, Union
import math

def _normalized_to_pixel_coordinates(
    normalized_x: float, normalized_y: float, image_width: int,
    image_height: int) -> Union[None, Tuple[int, int]]:
  """Converts normalized value pair to pixel coordinates."""

  # Checks if the float value is between 0 and 1.
  def is_valid_normalized_value(value: float) -> bool:
    return (value > 0 or math.isclose(0, value)) and (value < 1 or
                                                      math.isclose(1, value))

  if not (is_valid_normalized_value(normalized_x) and
          is_valid_normalized_value(normalized_y)):
    # TODO: Draw coordinates even if it's outside of the image bounds.
    return None
  x_px = min(math.floor(normalized_x * image_width), image_width - 1)
  y_px = min(math.floor(normalized_y * image_height), image_height - 1)
  return x_px, y_px

In [8]:
def get_detection_result_for_image(image, detector):
    image = np.array(image, dtype=np.uint8)
    
    image = mp.Image(image_format = mp.ImageFormat.SRGB, data = image)
    detection_result = detector.detect(image)
    
    return detection_result

In [9]:
def convert_3d_2_2d(face_landmarks, image_width, image_height):
    i = 0
    indices = []
    _2d_coords = []
    for landmark in face_landmarks:
        px_coords = _normalized_to_pixel_coordinates(landmark.x, landmark.y, image_width, image_height)
        
        if px_coords != None:
            x, y = px_coords
            _2d_coords.append([x, y])
        else:
            _2d_coords.append([0, 0])
            indices.append(i)
        
        i += 1
            
    return np.array(_2d_coords), np.array(indices)

In [10]:
test_image, test_label = test_dataset[86]
detection_result = get_detection_result_for_image(test_image, detector)
face_mesh_points, indices = convert_3d_2_2d(detection_result.face_landmarks[0], test_image.width, test_image.height) # numpy array

data_src_folder = "C:\\BME\\7_felev\\szakdoli\\dataset\\FER2013Plus\\numpy_data"

np.save(os.path.join(data_src_folder, 'face_mesh_points.npy'), face_mesh_points)

### Calculating the mask

In [11]:
# normalizer code from mediapipe

from typing import List, Mapping, Optional, Tuple, Union
import math

def _normalized_to_pixel_coordinates(
    normalized_x: float, normalized_y: float, image_width: int,
    image_height: int) -> Union[None, Tuple[int, int]]:
  """Converts normalized value pair to pixel coordinates."""

  # Checks if the float value is between 0 and 1.
  def is_valid_normalized_value(value: float) -> bool:
    return (value > 0 or math.isclose(0, value)) and (value < 1 or
                                                      math.isclose(1, value))

  if not (is_valid_normalized_value(normalized_x) and
          is_valid_normalized_value(normalized_y)):
    # TODO: Draw coordinates even if it's outside of the image bounds.
    return None
  x_px = min(math.floor(normalized_x * image_width), image_width - 1)
  y_px = min(math.floor(normalized_y * image_height), image_height - 1)
  return x_px, y_px

In [29]:
def get_detection_result_for_image(image, detector):
    image = np.array(image, dtype=np.uint8)
    
    image = mp.Image(image_format = mp.ImageFormat.SRGB, data = image)
    detection_result = detector.detect(image)
    
    return detection_result

In [30]:
def calculate_transformation_values_for_image(detection_result, image_width, image_height):
    nose_3d = detection_result.face_landmarks[0][4]
    forehead_3d = detection_result.face_landmarks[0][151]
    
    nose = _normalized_to_pixel_coordinates(nose_3d.x, nose_3d.y, image_width, image_height)
    forehead = _normalized_to_pixel_coordinates(forehead_3d.x, forehead_3d.y, image_width, image_height)
    if nose != None and forehead != None:
        nose_x, nose_y = nose
        forehead_x, forehead_y = forehead
        nose_2d = np.array([nose_x, nose_y])
        forehead_2d = np.array([forehead_x, forehead_y])
        center = np.array([int(image_height / 2), int(image_width / 2)])
        
        translation = center - nose_2d
        
        rotation_multiplier = -1
        if (forehead_2d - nose_2d)[0] > 0: rotation_multiplier = 1
        
        distance_nose_forehead = np.linalg.norm(nose_2d - forehead_2d)
        translated_forehead_2d = forehead_2d + translation
        rotated_forehead_2d = center + np.array([0, -distance_nose_forehead])
        
        scaling_factor = 90 / distance_nose_forehead
        
        b = np.linalg.norm(translated_forehead_2d - rotated_forehead_2d) / 2
        c = distance_nose_forehead
        
        rotation_angle = math.degrees(2 * math.asin(b / c))
        
        return translation, rotation_angle * rotation_multiplier, scaling_factor
    else:
        return None
    

In [31]:
def translate_points(points, translation):
    translated_points = points + translation
    return translated_points

def rotate_points(points, center, angle):
    angle_rad = np.radians(angle)
    
    rotation_matrix = np.array([[np.cos(angle_rad), -np.sin(angle_rad)],
                                 [np.sin(angle_rad), np.cos(angle_rad)]])
    
    centered_points = points - center
    
    rotated_points = centered_points.dot(rotation_matrix.T)
    
    rotated_points += center
    
    return rotated_points

def zoom_points(points, zoom_factor, center):
    if zoom_factor != 1:
        scaled_points = (points - center) * zoom_factor + center
        return scaled_points
    return points

In [None]:
data_src_folder = "C:\\BME\\7_felev\\szakdoli\\dataset\\FER2013Plus\\numpy_data"
def convert_3d_2_2d(face_landmarks, image_width, image_height):
    i = 0
    indices = []
    _2d_coords = []
    for landmark in face_landmarks:
        px_coords = _normalized_to_pixel_coordinates(landmark.x, landmark.y, image_width, image_height)
        
        if px_coords != None:
            x, y = px_coords
            _2d_coords.append([x, y])
        else:
            _2d_coords.append([0, 0])
            indices.append(i)
        
        i += 1
            
    return np.array(_2d_coords), np.array(indices)

transformed_masks = []
c = 0
for count in range(len(test_dataset)):
    test_image, test_label = test_dataset[count]
    
    if count % 500 == 0:
        print(count)
    
    
    
    detection_result = get_detection_result_for_image(test_image, detector)
    if len(detection_result.face_landmarks) > 0:
        transform_values = calculate_transformation_values_for_image(detection_result, test_image.width, test_image.height)
        if transform_values != None:
            translation, rotation_angle, scaling_factor = transform_values
            face_mesh_points, indices = convert_3d_2_2d(detection_result.face_landmarks[0], test_image.width, test_image.height) # numpy array
            
            translated_points = translate_points(face_mesh_points, translation)
            rotated_points = rotate_points(translated_points, [112, 112], -rotation_angle)
            zoomed_points = zoom_points(rotated_points, scaling_factor, [112, 112])
            
            for i in indices:
                zoomed_points[i] = np.array([0, 0])
            
            transformed_masks.append(zoomed_points)
            if count == 0:
                print("dik")
            np.save(os.path.join(data_src_folder, f"transformed_masks\\transformed_mask_{count}.npy"), zoomed_points)
            
transformed_masks = np.array(transformed_masks)

0
dik
500
1000
1500
2000
2500
3000
3500


In [12]:
transformed_masks.shape

(3117, 478, 2)

In [37]:
average_divisors = np.zeros(478)

for mask in transformed_masks:
    for index, point in enumerate(mask):
        if not np.array_equal(point, [0, 0]):
            average_divisors[index] += 1

In [36]:
summed_coordinates = np.sum(transformed_masks, axis=0) 
print(summed_coordinates.shape)

(478, 2)


In [38]:
average_divisors = average_divisors[:, np.newaxis]
print(average_divisors.shape)
average_mask = summed_coordinates / average_divisors
print(average_mask.shape)

(478, 1)
(478, 2)


In [39]:
[print(average_divisors)]

[[3117.]
 [3117.]
 [3117.]
 [3117.]
 [3117.]
 [3117.]
 [3117.]
 [3115.]
 [3117.]
 [3117.]
 [2889.]
 [3117.]
 [3117.]
 [3116.]
 [3115.]
 [3110.]
 [3087.]
 [3040.]
 [3032.]
 [3117.]
 [3117.]
 [2966.]
 [3117.]
 [3117.]
 [3117.]
 [3115.]
 [3117.]
 [3117.]
 [3117.]
 [3116.]
 [3114.]
 [3112.]
 [2925.]
 [3115.]
 [3002.]
 [3105.]
 [3117.]
 [3117.]
 [3116.]
 [3117.]
 [3117.]
 [3116.]
 [3116.]
 [3116.]
 [3117.]
 [3117.]
 [3103.]
 [3117.]
 [3117.]
 [3117.]
 [3113.]
 [3117.]
 [3115.]
 [3109.]
 [2993.]
 [3117.]
 [3117.]
 [3116.]
 [2999.]
 [3117.]
 [3117.]
 [3116.]
 [3116.]
 [3100.]
 [3117.]
 [3117.]
 [3117.]
 [2912.]
 [3069.]
 [3091.]
 [3091.]
 [3063.]
 [3117.]
 [3117.]
 [3117.]
 [3117.]
 [3116.]
 [3116.]
 [3116.]
 [3117.]
 [3116.]
 [3116.]
 [3116.]
 [3059.]
 [3070.]
 [3094.]
 [3114.]
 [3116.]
 [3116.]
 [3116.]
 [3116.]
 [3114.]
 [3117.]
 [2874.]
 [3117.]
 [3116.]
 [3116.]
 [3117.]
 [3117.]
 [3117.]
 [3117.]
 [3117.]
 [3117.]
 [2958.]
 [3074.]
 [3108.]
 [3115.]
 [3117.]
 [3109.]
 [2888.]
 [3115.]
 

[None]

In [43]:
data_src_folder = "C:\\BME\\7_felev\\szakdoli\\dataset\\FER2013Plus\\numpy_data"

np.save(os.path.join(data_src_folder, 'average_mask.npy'), average_mask)

In [41]:
print(average_mask.shape)

(478, 2)


In [42]:
print(average_mask)

[[112.71553424 146.0833622 ]
 [112.070558   119.69075116]
 [112.23377555 127.58151098]
 [105.46229414  93.89190856]
 [112.         112.        ]
 [111.96384066 101.67633927]
 [112.19006249  75.31835632]
 [ 57.39300797  69.64031036]
 [111.99809821  53.22620201]
 [112.03778431  42.3919845 ]
 [112.10603979   2.00174859]
 [112.76079162 148.89791652]
 [112.83691631 151.6186784 ]
 [112.69539268 153.71199188]
 [113.03714012 165.74850938]
 [112.98508463 169.22316524]
 [113.02445382 173.39231221]
 [112.86568002 177.45234722]
 [112.82601692 182.31894613]
 [112.14822913 123.90264891]
 [104.81687319 123.95426441]
 [ 29.60935855  33.36999996]
 [ 79.44692572  76.29299948]
 [ 71.76005248  77.07789964]
 [ 64.04855681  76.86728115]
 [ 54.05268607  72.06791858]
 [ 85.65780151  74.60609804]
 [ 66.41189941  52.49003542]
 [ 75.97793601  53.78576513]
 [ 58.03570136  53.89354292]
 [ 52.91453962  57.18767401]
 [ 47.83274442  76.560728  ]
 [ 83.97879675 188.96174041]
 [ 54.57118012  67.3248785 ]
 [ 27.30967745