In [32]:
import shutil
import os
import zipfile
import urllib.request
from typing import Optional, Sequence, cast, ClassVar, cast
from dataclasses import dataclass

import tensorflow as tf
import keras
import pandas as pd
import numpy as np
import numpy.typing as npt
import cv2 as cv
import tqdm

import scipy.spatial.distance

import sklearn.metrics
import sklearn.manifold
import sklearn.decomposition

import plotly.express as px
import plotly.offline

import skimage.feature

from standalone_preprocessing_library import *

plotly.offline.init_notebook_mode(connected=True)

Various constants

In [2]:
DATA_DIR_PATH = "data"
DATASET_PATH = "dataset.zip"
TRAINING_DATA_CSV_PATH = os.path.join(DATA_DIR_PATH, "data.csv")

MODEL_DIR_PATH = "models"
EXPRESSION_RECOGNITION_MODEL_PATH = os.path.join(MODEL_DIR_PATH, "expression_recognition.keras")
LBFMODEL_PATH = os.path.join(MODEL_DIR_PATH, "lbfmodel.yaml")
CASCADE_CLASSIFIER_PATH = os.path.join(MODEL_DIR_PATH, "haarcascade_frontalface_default.xml")

STANDARD_DIMENSIONS = Dimension(240, 240)
FACE_DIMENSIONS = Dimension(120, 120)
PREVIEW_DIMENSIONS = Dimension(500, 500)

LBP_HISTOGRAM_BIN_COUNT = 8
LBP_GRID_SIZE = (8,8)

In [3]:
class SupportedFacialExpressionLabels:
  Ours = ["angry", "disgusted", "happy", "neutral", "sad", "surprised"]
  Fer2013 = ["angry", "disgust", "fear", "happy", "neutral", "sad", "surprise"] 
  CkPlus = ["Anger", "Disgust", "Fear", "Happiness", "Sadness", "Surprise", "Neutral", "Contempt"]

CLASS_NAMES = SupportedFacialExpressionLabels.Ours

Utility functions

In [4]:
def get_files_in_folder(basepath: str)->list[str]:
  if not os.path.exists(basepath):
    return []
  file_paths = [os.path.join(basepath, filename) for filename in os.listdir(basepath)]
  return [path for path in file_paths if os.path.isfile(path)]

def partition_grid(self, rows: int, cols: int)->list["Rectangle"]:
  row_delta = int(self.width / rows)
  col_delta = int(self.height / cols)
  rects: list["Rectangle"] = []
  for row in range(rows):
    for col in range(cols):
      row_start = row * row_delta
      col_start = col * col_delta
      rect = Rectangle(col_start, row_start, col_start + col_delta, row_start + row_delta)
      rects.append(rect)
  return rects

@dataclass
class FaceLandmark:
  face_shape: npt.NDArray
  eyes: npt.NDArray
  eyebrows: npt.NDArray
  nose: npt.NDArray
  lips: npt.NDArray
  dims: Dimension

  @property
  def feature_points(self)->npt.NDArray:
    return np.vstack([self.eyes, self.eyebrows, self.nose, self.lips])

  def as_feature_vector(self)->npt.NDArray:
    # https://arxiv.org/pdf/1812.04510
    # 17 points are dedicated for the shape of the face, which we don't really need.
    normalized_points = self.feature_points / np.array((self.dims.width, self.dims.height))
    interdistance_map = scipy.spatial.distance.cdist(normalized_points, normalized_points, "euclidean").flatten()
    # All diagonal values are excluded
    excluded_points = np.eye(len(self.feature_points)).flatten() == 1

    # Square the interdistance map to make larger differences more prominent
    interdistance_map = np.power(interdistance_map[~excluded_points], 2)

    # Also calculate the distance to the average point in the face
    average_point = normalized_points.mean(axis=0)
    distances_to_center = scipy.spatial.distance.cdist(np.array([average_point]), normalized_points, "euclidean")[0]

    feature_vector = np.hstack((interdistance_map, distances_to_center))

    return feature_vector


  EYE_COLOR: ClassVar[tuple[int, int, int]] = (0, 0, 255)
  LIP_COLOR: ClassVar[tuple[int, int, int]] = (0, 255, 0)
  NOSE_COLOR: ClassVar[tuple[int, int, int]] = (255, 0, 0)
  FACE_SHAPE_COLOR: ClassVar[tuple[int, int, int]] = (255, 255, 0)
  EYEBROW_COLOR: ClassVar[tuple[int, int, int]] = (0, 255, 255)

  def project_point(self, point: npt.NDArray, rect: Optional[Rectangle] = None)->Sequence[int]:
    if rect is None:
      return cast(Sequence[int], point.astype(np.int32))
    projected_point = point * rect.dimensions.ndarray / self.dims.ndarray
    return cast(Sequence[int], (projected_point + rect.p0.ndarray).astype(np.int32))
  
  def draw_on(self, img: cv.typing.MatLike, *, offset: Optional[Rectangle] = None):
    for point in self.eyes:
      cv.circle(img, self.project_point(point, offset), 1, self.EYE_COLOR, -1)
    for point in self.lips:
      cv.circle(img, self.project_point(point, offset), 1, self.LIP_COLOR, -1)
    for point in self.nose:
      cv.circle(img, self.project_point(point, offset), 1, self.NOSE_COLOR, -1)
    for point in self.face_shape:
      cv.circle(img, self.project_point(point, offset), 1, self.FACE_SHAPE_COLOR, -1)
    for point in self.eyebrows:
      cv.circle(img, self.project_point(point, offset), 1, self.EYEBROW_COLOR, -1)

  @staticmethod
  def from_raw_landmark(points: npt.NDArray, dims: Dimension):
    return FaceLandmark(
      face_shape=points[:17],
      eyebrows=points[17:27],
      nose=points[27:36],
      eyes=points[36:48],
      lips=points[48:],
      dims=dims
    )
  
def face_alignment(img: cv.typing.MatLike, landmark: FaceLandmark):
  # https://pyimagesearch.com/2017/05/22/face-alignment-with-opencv-and-python/
  dims = Dimension.from_shape(img.shape)
  desired_left_eye = Point(int(dims.width * 0.22), int(dims.height * 0.25))
  desired_right_eye_x = FACE_DIMENSIONS.width - desired_left_eye.x

  left_eye_avg = landmark.eyes[0:6].mean(axis=0)
  right_eye_avg = landmark.eyes[6:].mean(axis=0)

  delta = right_eye_avg - left_eye_avg
  angle = np.degrees(np.arctan2(delta[1], delta[0]))

  dist = np.sqrt(delta[0] ** 2 + delta[1] ** 2)
  desired_dist = desired_right_eye_x - desired_left_eye.x
  scale = desired_dist / dist

  eyes_center = np.array([left_eye_avg, right_eye_avg]).mean(axis=0)
  rotation_matrix = cv.getRotationMatrix2D(eyes_center, angle, scale)

  translation_x = FACE_DIMENSIONS.width * 0.5
  translation_y = desired_left_eye.y
  rotation_matrix[0, 2] += (translation_x - eyes_center[0])
  rotation_matrix[1, 2] += (translation_y - eyes_center[1])

  img = cv.warpAffine(img, rotation_matrix, FACE_DIMENSIONS.tuple, flags=cv.INTER_CUBIC)

  return img


Load various preprocessing models

In [5]:
# https://medium.com/analytics-vidhya/facial-landmarks-and-face-detection-in-python-with-opencv-73979391f30e
LBFMODEL_URL = "https://github.com/kurnianggoro/GSOC2017/raw/master/data/lbfmodel.yaml"
if not os.path.exists(LBFMODEL_PATH):
  print(f"Cannot find any LBFmodel installation. Installing from {LBFMODEL_URL} to {LBFMODEL_PATH}")
  urllib.request.urlretrieve(LBFMODEL_URL, LBFMODEL_PATH)
landmark_model = cv.face.createFacemarkLBF()
landmark_model.loadModel(LBFMODEL_PATH)

In [6]:
CASCADE_CLASSIFIER_URL = "https://raw.githubusercontent.com/opencv/opencv/refs/heads/4.x/data/haarcascades/haarcascade_frontalface_default.xml"
if not os.path.exists(CASCADE_CLASSIFIER_PATH):
  urllib.request.urlretrieve(CASCADE_CLASSIFIER_URL, CASCADE_CLASSIFIER_PATH) 
locator_model = cv.CascadeClassifier(CASCADE_CLASSIFIER_PATH)
locator_model.load(CASCADE_CLASSIFIER_PATH)

clahe = cv.createCLAHE(tileGridSize=(8, 8), clipLimit=2.0)

Dump the contents of the zip file into the data folder. You don't have to run this if this has been performed.

In [7]:
if (os.path.exists(DATA_DIR_PATH)):
  shutil.rmtree(DATA_DIR_PATH)
with zipfile.ZipFile(DATASET_PATH, 'r') as zip_ref:
  zip_ref.extractall(DATA_DIR_PATH)

List all folder contents

In [8]:
@dataclass
class TrainDataEntry:
  path: str
  label: int

entries: list[TrainDataEntry] = []
for folder in os.scandir(DATA_DIR_PATH):
  if not folder.is_dir():
    continue
  try:
    expression = CLASS_NAMES.index(folder.name)
  except ValueError:
    print(f"Skipping the inclusion of {folder.name} in the dataset.")
    continue

  entries.extend(map(
    lambda fpath: TrainDataEntry(path=fpath, label=expression),
    get_files_in_folder(folder.path)
  ))

Execute data preprocessing steps for our dataset

In [44]:
skipped = 0
list_data: list[npt.NDArray] = []
list_labels: list[int] = []
for entry in tqdm.tqdm(entries, desc="Building dataset from images"):
  original = cv.imread(entry.path)
  
  # Preprocessing
  img_resized = resize_image(original, STANDARD_DIMENSIONS)
  img_grayscale = cv.cvtColor(img_resized, cv.COLOR_BGR2GRAY)
  img_clahe = clahe.apply(img_grayscale)

  # Extract face coordinates
  face_coordinates = locator_model.detectMultiScale(img_clahe)
  face_rects = list(Rectangle.from_tuple(coords) for coords in face_coordinates)
  face_rects.sort(key=lambda x: x.area)
  saved_face_rects: list[Rectangle] = []
  # Prevent overlapping face rectangles
  for face_rect_a in face_rects:
    is_overlapping = False
    for face_rect_b in saved_face_rects:
      IOU = face_rect_a.intersection_with_union(face_rect_b)
      if IOU > 0.4:
        is_overlapping = True
        break

    if not is_overlapping:
      saved_face_rects.append(face_rect_a)

  # Get faces from original image
  faces = list(
    img_clahe[pos.slice]
    for pos in face_rects
  )

  features: list[npt.NDArray] = []
  for raw_face_img in faces:
    # Additional preprocessing
    face_resized = cv.resize(raw_face_img, FACE_DIMENSIONS.tuple, interpolation=cv.INTER_CUBIC)
    face_blurred = cv.filter2D(face_resized, -1, GAUSSIAN_3X3_KERNEL)
    face_preprocessed = cv.filter2D(face_blurred, -1, SHARPEN_KERNEL)

    # Get face landmarks
    _, raw_face_landmarks = landmark_model.fit(face_preprocessed, np.array(((0, 0, face_preprocessed.shape[0], face_preprocessed.shape[1]),)))
    face_landmark_points: npt.NDArray = raw_face_landmarks[0][0]
    face_landmark = FaceLandmark(
      face_shape=face_landmark_points[:17],
      eyebrows=face_landmark_points[17:27],
      nose=face_landmark_points[27:36],
      eyes=face_landmark_points[36:48],
      lips=face_landmark_points[48:],
      dims=Dimension.from_shape(face_preprocessed.shape)
    )
    face_dims = Dimension.from_shape(raw_face_img.shape)
    face_landmark = FaceLandmark.from_raw_landmark(face_landmark_points, face_dims)
    face_aligned = face_alignment(face_preprocessed, face_landmark)

    # Split image to grids for LBP
    lbp_grid_rects = partition_grid(face_dims, *LBP_GRID_SIZE)
    histograms: list[npt.NDArray] = []

    # Perform LBP
    lbp_image: npt.NDArray = skimage.feature.local_binary_pattern(face_aligned, 8, 1)
    for lbp_grid_rect in lbp_grid_rects:
      chunk = lbp_image[lbp_grid_rect.slice]

      if chunk.size == 0:
        histograms.append(np.full((LBP_HISTOGRAM_BIN_COUNT,), 0))
        continue
      histograms.append(scipy.ndimage.histogram(chunk, 0, 255, LBP_HISTOGRAM_BIN_COUNT) / chunk.size)

    feature_vector = np.hstack(histograms)
    features.append(feature_vector)

  if len(features) == 0:
    print(f"\nSkipping {entry.path} because no faces were found in the image.")
    skipped += 1
    continue
  list_data.extend(features)
  list_labels.extend([entry.label] * len(features))

print(f"Skipped over {skipped} images because no faces were found in the images.")

data = np.array(list_data)
labels = np.array(list_labels).reshape((-1, 1))
dfdata = np.hstack((labels, data))
df = pd.DataFrame(dfdata, columns=[
  "label",
  *map(lambda idx: f'feature-{idx + 1}', range(data.shape[1])),
])

df.to_csv(TRAINING_DATA_CSV_PATH, index=False)

Building dataset from images:   0%|          | 0/1089 [00:00<?, ?it/s]


Skipping data\angry\20240927_173906.jpg because no faces were found in the image.


Building dataset from images:   1%|          | 9/1089 [00:00<01:07, 15.91it/s]


Skipping data\angry\20241008_180500.jpg because no faces were found in the image.

Skipping data\angry\20241008_180548.jpg because no faces were found in the image.


Building dataset from images:   1%|▏         | 14/1089 [00:01<01:14, 14.38it/s]


Skipping data\angry\20241008_180735.jpg because no faces were found in the image.

Skipping data\angry\20241008_180740.jpg because no faces were found in the image.


Building dataset from images:   9%|▉         | 100/1089 [00:07<01:24, 11.73it/s]


Skipping data\angry\marah_4.jpg because no faces were found in the image.

Skipping data\angry\marah_5.jpg because no faces were found in the image.

Skipping data\angry\marah_7.jpg because no faces were found in the image.


Building dataset from images:  11%|█         | 116/1089 [00:08<01:07, 14.52it/s]


Skipping data\angry\Resize_20241210_223059_9926.jpg because no faces were found in the image.

Skipping data\angry\Resize_20241210_223104_4667.jpg because no faces were found in the image.

Skipping data\angry\Resize_20241210_223109_9133.jpg because no faces were found in the image.


Building dataset from images:  12%|█▏        | 128/1089 [00:08<00:53, 18.00it/s]


Skipping data\angry\Resize_20241210_223148_8201.jpg because no faces were found in the image.

Skipping data\angry\Resize_20241210_223150_0517.jpg because no faces were found in the image.

Skipping data\angry\Resize_20241210_223152_2656.jpg because no faces were found in the image.


Building dataset from images:  12%|█▏        | 135/1089 [00:09<00:49, 19.10it/s]


Skipping data\angry\Resize_20241210_223217_7806.jpg because no faces were found in the image.

Skipping data\angry\Resize_20241210_223227_7281.jpg because no faces were found in the image.


Building dataset from images:  13%|█▎        | 141/1089 [00:09<00:46, 20.37it/s]


Skipping data\angry\Resize_20241210_223229_9963.jpg because no faces were found in the image.

Skipping data\angry\Resize_20241210_223234_4400.jpg because no faces were found in the image.


Building dataset from images:  16%|█▌        | 171/1089 [00:11<00:43, 21.01it/s]


Skipping data\disgusted\20241008_180238.jpg because no faces were found in the image.


Building dataset from images:  23%|██▎       | 249/1089 [00:16<01:09, 12.01it/s]


Skipping data\disgusted\IMG_20241010_213941_9_11zon.jpg because no faces were found in the image.


Building dataset from images:  27%|██▋       | 289/1089 [00:18<00:52, 15.21it/s]


Skipping data\disgusted\jijik_7.jpg because no faces were found in the image.


Building dataset from images:  29%|██▉       | 318/1089 [00:20<00:41, 18.36it/s]


Skipping data\disgusted\WA_Disgusted_195915-min.jpg because no faces were found in the image.


Building dataset from images:  43%|████▎     | 469/1089 [00:32<00:38, 16.06it/s]


Skipping data\happy\IMG_20241215_230738.jpg because no faces were found in the image.


Building dataset from images:  45%|████▌     | 494/1089 [00:33<00:39, 15.00it/s]


Skipping data\happy\Resize_20241210_223206_6401.jpg because no faces were found in the image.


Building dataset from images:  46%|████▌     | 502/1089 [00:34<00:45, 12.86it/s]


Skipping data\happy\Resize_20241210_223245_5560.jpg because no faces were found in the image.


Building dataset from images:  47%|████▋     | 509/1089 [00:35<00:39, 14.84it/s]


Skipping data\happy\Resize_20241210_223303_3593.jpg because no faces were found in the image.


Building dataset from images:  47%|████▋     | 513/1089 [00:35<00:40, 14.25it/s]


Skipping data\happy\Resize_20241210_223315_5498.jpg because no faces were found in the image.


Building dataset from images:  48%|████▊     | 523/1089 [00:36<00:43, 12.95it/s]


Skipping data\happy\senyum_4.jpg because no faces were found in the image.

Skipping data\happy\senyum_5.jpg because no faces were found in the image.


Building dataset from images:  52%|█████▏    | 565/1089 [00:38<00:31, 16.70it/s]


Skipping data\neutral\20240927_173857.jpg because no faces were found in the image.


Building dataset from images:  52%|█████▏    | 571/1089 [00:39<00:40, 12.66it/s]


Skipping data\neutral\20241008_181350.jpg because no faces were found in the image.


Building dataset from images:  60%|██████    | 658/1089 [00:46<00:35, 12.09it/s]


Skipping data\neutral\IMG_20241215_231542.jpg because no faces were found in the image.


Building dataset from images:  65%|██████▍   | 705/1089 [00:49<00:19, 19.48it/s]


Skipping data\neutral\Resize_20241210_223106_6875.jpg because no faces were found in the image.

Skipping data\neutral\Resize_20241210_223115_5905.jpg because no faces were found in the image.


Building dataset from images:  66%|██████▌   | 717/1089 [00:50<00:24, 15.08it/s]


Skipping data\neutral\WhatsApp Image 2024-09-19 at 12.24.47 PM.jpg because no faces were found in the image.


Building dataset from images:  66%|██████▋   | 723/1089 [00:51<00:18, 19.28it/s]


Skipping data\neutral\WhatsApp Image 2024-12-17 at 23.39.51 (2).jpg because no faces were found in the image.

Skipping data\neutral\WhatsApp Image 2024-12-17 at 23.39.52 (1).jpg because no faces were found in the image.

Skipping data\neutral\WhatsApp Image 2024-12-17 at 23.39.52 (2).jpg because no faces were found in the image.

Skipping data\neutral\WhatsApp Image 2024-12-17 at 23.39.52.jpg because no faces were found in the image.


Building dataset from images:  67%|██████▋   | 726/1089 [00:51<00:17, 20.36it/s]


Skipping data\neutral\WhatsApp Image 2024-12-17 at 23.39.53 (1).jpg because no faces were found in the image.

Skipping data\neutral\WhatsApp Image 2024-12-17 at 23.39.53 (2).jpg because no faces were found in the image.

Skipping data\neutral\WhatsApp Image 2024-12-17 at 23.39.53.jpg because no faces were found in the image.

Skipping data\neutral\WhatsApp Image 2024-12-17 at 23.39.54 (1).jpg because no faces were found in the image.

Skipping data\neutral\WhatsApp Image 2024-12-17 at 23.39.54 (2).jpg because no faces were found in the image.


Building dataset from images:  67%|██████▋   | 732/1089 [00:51<00:15, 22.36it/s]


Skipping data\neutral\WhatsApp Image 2024-12-17 at 23.39.54.jpg because no faces were found in the image.

Skipping data\neutral\WhatsApp Image 2024-12-17 at 23.39.55 (1).jpg because no faces were found in the image.

Skipping data\neutral\WhatsApp Image 2024-12-17 at 23.39.55 (2).jpg because no faces were found in the image.

Skipping data\neutral\WhatsApp Image 2024-12-17 at 23.39.55.jpg because no faces were found in the image.

Skipping data\neutral\WhatsApp Image 2024-12-17 at 23.39.56 (1).jpg because no faces were found in the image.


Building dataset from images:  68%|██████▊   | 738/1089 [00:51<00:17, 20.48it/s]


Skipping data\neutral\WhatsApp Image 2024-12-17 at 23.39.56.jpg because no faces were found in the image.

Skipping data\neutral\WhatsApp Image 2024-12-17 at 23.39.57.jpg because no faces were found in the image.


Building dataset from images:  76%|███████▌  | 827/1089 [00:58<00:20, 12.74it/s]


Skipping data\sad\IMG_20241215_230952.jpg because no faces were found in the image.


Building dataset from images:  79%|███████▉  | 864/1089 [01:01<00:12, 17.56it/s]


Skipping data\sad\Resize_20241210_222832_2123.jpg because no faces were found in the image.

Skipping data\sad\Resize_20241210_222839_9322.jpg because no faces were found in the image.


Building dataset from images:  80%|████████  | 872/1089 [01:01<00:11, 18.98it/s]


Skipping data\sad\Resize_20241210_222845_5876.jpg because no faces were found in the image.

Skipping data\sad\Resize_20241210_222850_0264.jpg because no faces were found in the image.

Skipping data\sad\Resize_20241210_222852_2560.jpg because no faces were found in the image.


Building dataset from images:  81%|████████  | 878/1089 [01:02<00:13, 15.82it/s]


Skipping data\sad\Resize_20241210_223123_3199.jpg because no faces were found in the image.


Building dataset from images:  81%|████████  | 884/1089 [01:02<00:14, 13.75it/s]


Skipping data\sad\sedih_5.jpg because no faces were found in the image.

Skipping data\sad\sedih_7.jpg because no faces were found in the image.


Building dataset from images:  83%|████████▎ | 905/1089 [01:03<00:10, 17.79it/s]


Skipping data\sad\WhatsApp Image 2024-12-17 at 23.31.37 (1).jpg because no faces were found in the image.

Skipping data\sad\WhatsApp Image 2024-12-17 at 23.31.37 (2).jpg because no faces were found in the image.

Skipping data\sad\WhatsApp Image 2024-12-17 at 23.31.38 (2).jpg because no faces were found in the image.


Building dataset from images:  84%|████████▎ | 910/1089 [01:04<00:09, 18.44it/s]


Skipping data\sad\WhatsApp Image 2024-12-17 at 23.31.38.jpg because no faces were found in the image.

Skipping data\sad\WhatsApp Image 2024-12-17 at 23.31.39 (2).jpg because no faces were found in the image.


Building dataset from images:  84%|████████▍ | 916/1089 [01:04<00:09, 17.68it/s]


Skipping data\sad\WhatsApp Image 2024-12-17 at 23.31.40 (2).jpg because no faces were found in the image.

Skipping data\sad\WhatsApp Image 2024-12-17 at 23.31.41 (1).jpg because no faces were found in the image.


Building dataset from images:  85%|████████▌ | 928/1089 [01:05<00:10, 16.07it/s]


Skipping data\surprised\20241008_181307.jpg because no faces were found in the image.


Building dataset from images:  92%|█████████▏| 1004/1089 [01:11<00:06, 12.52it/s]


Skipping data\surprised\IMG_20241215_231052_1.jpg because no faces were found in the image.


Building dataset from images:  93%|█████████▎| 1014/1089 [01:12<00:05, 12.53it/s]


Skipping data\surprised\IMG_20241215_231102.jpg because no faces were found in the image.


Building dataset from images:  94%|█████████▍| 1027/1089 [01:13<00:04, 14.16it/s]


Skipping data\surprised\Resize_20241210_222854_4662.jpg because no faces were found in the image.

Skipping data\surprised\Resize_20241210_222856_6766.jpg because no faces were found in the image.


Building dataset from images:  96%|█████████▋| 1049/1089 [01:14<00:02, 16.61it/s]


Skipping data\surprised\Resize_20241210_223046_6304.jpg because no faces were found in the image.


Building dataset from images:  97%|█████████▋| 1055/1089 [01:15<00:02, 14.46it/s]


Skipping data\surprised\terkejut_4.jpg because no faces were found in the image.

Skipping data\surprised\terkejut_5.jpg because no faces were found in the image.


Building dataset from images: 100%|██████████| 1089/1089 [01:17<00:00, 14.11it/s]


Skipped over 75 images because no faces were found in the images.


Start training process

In [45]:
df = pd.read_csv(TRAINING_DATA_CSV_PATH, index_col=False)
labels = df["label"]
df = df.drop(columns=["label"])
df

Unnamed: 0,feature-1,feature-2,feature-3,feature-4,feature-5,feature-6,feature-7,feature-8,feature-9,feature-10,...,feature-503,feature-504,feature-505,feature-506,feature-507,feature-508,feature-509,feature-510,feature-511,feature-512
0,0.376731,0.041551,0.016620,0.058172,0.030471,0.000000,0.072022,0.404432,0.479224,0.008310,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,0.408163,0.090703,0.018141,0.083900,0.006803,0.002268,0.081633,0.308390,0.512472,0.036281,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,0.503086,0.089506,0.018519,0.080247,0.015432,0.000000,0.061728,0.231481,0.444444,0.046296,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,0.404959,0.047521,0.020661,0.068182,0.045455,0.004132,0.076446,0.332645,0.438017,0.066116,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,0.371901,0.057851,0.020661,0.082645,0.045455,0.004132,0.053719,0.363636,0.458678,0.039256,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1077,0.173469,0.076531,0.040816,0.193878,0.127551,0.010204,0.178571,0.198980,0.153061,0.122449,...,0.229592,0.275510,0.107143,0.107143,0.025510,0.056122,0.107143,0.020408,0.428571,0.147959
1078,0.146667,0.111111,0.035556,0.168889,0.120000,0.013333,0.186667,0.217778,0.191111,0.128889,...,0.546667,0.128889,0.342222,0.093333,0.057778,0.142222,0.013333,0.008889,0.057778,0.284444
1079,0.142857,0.117347,0.040816,0.178571,0.117347,0.015306,0.193878,0.193878,0.188776,0.081633,...,0.250000,0.244898,0.163265,0.066327,0.020408,0.061224,0.091837,0.010204,0.418367,0.168367
1080,0.158163,0.096939,0.040816,0.173469,0.102041,0.010204,0.198980,0.219388,0.168367,0.086735,...,0.239796,0.244898,0.158163,0.076531,0.030612,0.056122,0.081633,0.010204,0.454082,0.132653


Data Visualization

In [46]:
tsne = sklearn.manifold.TSNE()
data_points2d = tsne.fit_transform(df)

labels_series = pd.Series(labels, name="Label").replace(range(len(CLASS_NAMES)), CLASS_NAMES) # type: ignore

data_points2d_df = pd.DataFrame(data_points2d, columns=["x", "y"])
visdf = pd.concat([data_points2d_df, labels_series], axis=1)

fig = px.scatter(visdf, x="x", y="y", color="Label")
fig.show()

In [47]:
dataset = tf.data.Dataset.from_tensor_slices((df, labels))
train_dataset, test_dataset = keras.utils.split_dataset(dataset, shuffle=True, seed=42, left_size=0.7)
train_dataset, validation_dataset = keras.utils.split_dataset(train_dataset, shuffle=True, seed=42, left_size=0.7)

train_dataset: tf.data.Dataset = train_dataset.shuffle(100).batch(16)
test_dataset: tf.data.Dataset = test_dataset.batch(16)
validation_dataset: tf.data.Dataset = validation_dataset.batch(16)

dataset

<_TensorSliceDataset element_spec=(TensorSpec(shape=(512,), dtype=tf.float64, name=None), TensorSpec(shape=(), dtype=tf.float64, name=None))>

In [103]:
model = keras.Sequential([
  keras.layers.Input(shape=dataset.element_spec[0].shape),
  keras.layers.Dense(256, activation="relu"),
  keras.layers.Dropout(0.3),
  keras.layers.Dense(128, activation="relu"),
  keras.layers.Dropout(0.3),
  keras.layers.Dense(64, activation="relu"),
  keras.layers.Dense(6, activation="softmax"),
])

model.compile(
  optimizer=keras.optimizers.Adam(learning_rate=0.001), # type: ignore
  loss=keras.losses.SparseCategoricalCrossentropy(),
  metrics=["accuracy"],
)

model.summary()

In [104]:
history = model.fit(
  train_dataset, epochs=40,
  validation_data=validation_dataset,
)

Epoch 1/40
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - accuracy: 0.2117 - loss: 1.7820 - val_accuracy: 0.2775 - val_loss: 1.7334
Epoch 2/40
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.2464 - loss: 1.7253 - val_accuracy: 0.3128 - val_loss: 1.6961
Epoch 3/40
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.2802 - loss: 1.6725 - val_accuracy: 0.2952 - val_loss: 1.6499
Epoch 4/40
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.3265 - loss: 1.5892 - val_accuracy: 0.4053 - val_loss: 1.5511
Epoch 5/40
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.3808 - loss: 1.5380 - val_accuracy: 0.4626 - val_loss: 1.4825
Epoch 6/40
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.4622 - loss: 1.4388 - val_accuracy: 0.4890 - val_loss: 1.3863
Epoch 7/40
[1m34/34[0m [32m━━━━━━━

In [105]:
history_df = pd.DataFrame(history.history)

fig = px.line(history_df, x=history_df.index, y="accuracy", markers=True, title="Training Accuracy")
fig2 = px.line(history_df, x=history_df.index, y="val_accuracy", markers=True)

fig.update_traces(dict(
  marker=dict(
    color="blue"
  ),
  line=dict(
    color="blue",
  ),
))
fig2.update_traces(dict(
  marker=dict(
    color="orange"
  ),
  line=dict(
    color="orange",
  ),
))
fig.add_traces(fig2.data)
fig.data[0]["name"] = "Accuracy" # type: ignore
fig.data[1]["name"] = "Validation Accuracy" # type: ignore
fig.update_yaxes(dict(range=[0,1.2]))

fig.add_hline(y=history_df["val_accuracy"].median(), line_color="orange", line_dash="dash", label=dict(text="Median Val Accuracy", textposition="start"))
fig.add_hline(y=history_df["accuracy"].median(), line_color="blue", line_dash="dash", label=dict(text="Median Accuracy", textposition="start"))

fig.update_traces(dict(
  showlegend=True
))
fig.show()

In [106]:
from typing import cast

confidences = model.predict(test_dataset)
predictions = np.argmax(confidences, axis=1)

labels = cast(list[int], list(test_dataset.unbatch().map(lambda x, y: y).as_numpy_iterator()))

cmat = sklearn.metrics.confusion_matrix(labels, predictions, normalize="true")

fig = px.imshow(cmat, x=CLASS_NAMES, y=CLASS_NAMES, range_color=[0,1])
fig.show()

sklearn.metrics.classification_report(labels, predictions, output_dict=True, target_names=CLASS_NAMES)

[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step


{'angry': {'precision': 0.6111111111111112,
  'recall': 0.717391304347826,
  'f1-score': 0.66,
  'support': 46.0},
 'disgusted': {'precision': 0.7872340425531915,
  'recall': 0.6851851851851852,
  'f1-score': 0.7326732673267327,
  'support': 54.0},
 'happy': {'precision': 0.7391304347826086,
  'recall': 0.75,
  'f1-score': 0.7445255474452555,
  'support': 68.0},
 'neutral': {'precision': 0.6666666666666666,
  'recall': 0.7659574468085106,
  'f1-score': 0.7128712871287128,
  'support': 47.0},
 'sad': {'precision': 0.7169811320754716,
  'recall': 0.7037037037037037,
  'f1-score': 0.7102803738317757,
  'support': 54.0},
 'surprised': {'precision': 0.8333333333333334,
  'recall': 0.7142857142857143,
  'f1-score': 0.7692307692307693,
  'support': 56.0},
 'accuracy': 0.7230769230769231,
 'macro avg': {'precision': 0.7257427867537304,
  'recall': 0.7227538923884899,
  'f1-score': 0.7215968741605411,
  'support': 325.0},
 'weighted avg': {'precision': 0.7310756926346963,
  'recall': 0.72307692

In [107]:
predictions_series = pd.Series(predictions, name="Predictions").replace(range(len(CLASS_NAMES)), CLASS_NAMES) #type: ignore
labels_series = pd.Series(labels, name="Label").replace(range(len(CLASS_NAMES)), CLASS_NAMES) # type: ignore
discrete_color_map = dict(zip(CLASS_NAMES, px.colors.qualitative.Plotly))
symbol_map = {1: "circle", 0: "x"}

correct_state = pd.Series(labels_series == predictions_series, name="Correct")
visdf = pd.concat([data_points2d_df, labels_series, predictions_series, correct_state], axis=1)
visdf2 = pd.concat([data_points2d_df, labels_series, predictions_series, correct_state], axis=1)
fig = px.scatter(visdf, x="x", y="y", color="Predictions", hover_data=["Label"], symbol="Correct", color_discrete_map=discrete_color_map, symbol_map=symbol_map)
fig2 = px.scatter(visdf, x="x", y="y", color="Label", hover_data=["Predictions"], opacity=0.2, color_discrete_map=discrete_color_map)

fig2.update_traces(dict(
  marker=dict(
    size=12,
  ),
  showlegend=False
))

fig.add_traces(fig2.data)

fig.show()

In [108]:
tsne_confidences = sklearn.manifold.TSNE()
confidences_points = tsne_confidences.fit_transform(confidences)

confidences_points_df = pd.DataFrame(confidences_points, columns=["x", "y"])
visdf = pd.concat([confidences_points_df, predictions_series, labels_series, correct_state], axis=1)

fig = px.scatter(visdf, x="x", y="y", color="Predictions", symbol="Correct", symbol_map=symbol_map, color_discrete_map=discrete_color_map)
fig2 = px.scatter(visdf, x="x", y="y", color="Label", hover_data=["Predictions"], opacity=0.2, color_discrete_map=discrete_color_map)
fig2.update_traces(dict(
  marker=dict(
    size=12,
  ),
  showlegend=False
))


fig.add_traces(fig2.data)
fig.show()

In [109]:
model.save(EXPRESSION_RECOGNITION_MODEL_PATH)