<a href="https://colab.research.google.com/github/PeterRutkowski/fer/blob/master/data_prep.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
from PIL import Image
import numpy as np
import random
import cv2
from google.colab.patches import cv2_imshow
import dlib
from imutils.face_utils import FaceAligner

In [None]:
class DLIB:
    def __init__(self):
        self.detector = dlib.get_frontal_face_detector()
        self.predictor = dlib.shape_predictor('drive/My Drive/project42/shape_predictor_68_face_landmarks.dat')
        self.FaceAligner = FaceAligner(self.predictor, desiredFaceWidth=100, desiredLeftEye=(0.22, 0.22))
        self.faces = np.empty(0)
    def detect(self, img):
        faces = []
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        rects = self.detector(gray, 2)
        for rect in rects:
            faces.append(cv2.cvtColor(self.FaceAligner.align(img, gray, rect), cv2.COLOR_BGR2GRAY))
        self.faces = np.float16(faces)

In [None]:
DLIB = DLIB()

In [None]:
# raw frontal images
path = 'drive/My Drive/project42/KDEF_and_AKDEF/KDEF/'
folders = np.sort(os.listdir(path))

x, y = [], []

counter = 0
for folder in folders:
  images = np.sort(os.listdir('{}{}/'.format(path, folder)))
  for image in images:
    if image[6] == 'S':
      x.append(np.asarray(Image.open('{}{}/{}'.format(path, folder, image))))
      y.append(image[4:6])

x, y = np.asarray(x), np.asarray(y)

np.savez_compressed('drive/My Drive/project42/kdef_raw.npz', x=x, y=y)

print(x.shape, y.shape)

(981,) (981,)


In [None]:
# detect faces
loaded = np.load('drive/My Drive/project42/kdef_raw.npz', allow_pickle=True)
X = loaded['x']
Y = loaded['y']

detected_faces = []

for i in range(len(X)):
  DLIB.detect(X[i])
  detected_faces.append(DLIB.faces[0])

x = np.asarray(detected_faces)

np.savez_compressed('drive/My Drive/project42/kdef_detected_faces.npz', x=x, y=Y)

print(x.shape, Y.shape)

(981, 100, 100) (981,)


In [None]:
# grouping indices of classes
loaded = np.load('drive/My Drive/project42/kdef_detected_faces.npz', allow_pickle=True)
X = loaded['x']
Y = loaded['y']

classes = np.unique(Y)
classes_dict = {}

for i in range(len(Y)):
  if Y[i] in classes_dict:
    classes_dict[Y[i]][0] += 1
    classes_dict[Y[i]][1].append(i)
  else:
    classes_dict[Y[i]] = [1,[i]]

print(classes)
print(classes_dict)

['AF' 'AN' 'DI' 'HA' 'NE' 'SA' 'SU']
{'AF': [141, [0, 7, 14, 21, 28, 35, 42, 49, 56, 63, 70, 77, 84, 91, 98, 105, 112, 119, 126, 133, 140, 147, 154, 161, 168, 175, 182, 189, 196, 203, 210, 217, 224, 231, 238, 245, 252, 259, 266, 273, 280, 287, 294, 301, 308, 315, 322, 329, 336, 343, 350, 357, 364, 371, 378, 385, 392, 399, 406, 413, 420, 427, 434, 441, 448, 455, 462, 469, 476, 483, 490, 497, 504, 511, 518, 525, 532, 539, 546, 553, 560, 567, 574, 581, 588, 595, 602, 609, 616, 623, 630, 637, 644, 651, 658, 665, 672, 679, 680, 687, 694, 701, 708, 715, 722, 729, 736, 743, 750, 757, 764, 771, 778, 785, 792, 799, 806, 813, 820, 827, 834, 841, 848, 855, 862, 869, 876, 883, 890, 897, 904, 911, 918, 925, 932, 939, 946, 953, 960, 967, 974]], 'AN': [140, [1, 8, 15, 22, 29, 36, 43, 50, 57, 64, 71, 78, 85, 92, 99, 106, 113, 120, 127, 134, 141, 148, 155, 162, 169, 176, 183, 190, 197, 204, 211, 218, 225, 232, 239, 246, 253, 260, 267, 274, 281, 288, 295, 302, 309, 316, 323, 330, 337, 344, 351, 358, 365

In [None]:
# validation split
val_split = 0.3
class_quantity = 140
val_quantity = int(class_quantity*val_split)

x_train, y_train, x_val, y_val = [], [], [], []
OHE = {'AF': [1, 0, 0, 0, 0, 0, 0], 'AN': [0, 1, 0, 0, 0, 0, 0],
       'DI': [0, 0, 1, 0, 0, 0, 0], 'HA': [0, 0, 0, 1, 0, 0, 0],
       'NE': [0, 0, 0, 0, 1, 0, 0], 'SA': [0, 0, 0, 0, 0, 1, 0],
       'SU': [0, 0, 0, 0, 0, 0, 1], }

for key in classes_dict.keys():
  val_ind = random.sample(range(class_quantity), val_quantity)
  for i in range(class_quantity):
    if i in val_ind:
      x_val.append(X[i])
      y_val.append(OHE[Y[i]])
    else:
      x_train.append(X[i])
      y_train.append(OHE[Y[i]])

x_train=np.asarray(x_train)
y_train=np.asarray(y_train)
x_val=np.asarray(x_val)
y_val=np.asarray(y_val)

np.savez_compressed('drive/My Drive/project42/kdef.npz',
                    x_train=x_train, y_train=y_train,
                    x_val=x_val, y_val=y_val)

print(x_train.shape, y_train.shape, x_val.shape, y_val.shape)

(686, 100, 100) (686, 7) (294, 100, 100) (294, 7)
