In [13]:
import math
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from Affective_Computing.getPipeData import get_pipe_data
import cv2
from pygame import Rect, Vector2
import numpy as np
from Affective_Computing.PointCloud import FaceCloud
import TrainedInceptionResnetV2
from scipy.io import savemat, loadmat
from tqdm import tqdm
import pickle

In [2]:
np.random.seed(101)

emotions = ["Anger", "Contempt", "Disgust", "Fear", "Happy", "Neutral", "Sad", "Surprise"]

dataset_path = "/Users/benhoskings/Documents/Emotion Recognition/Datasets/AffectNet/Data/train_set/matlab"

Load all models

In [3]:
base_options = python.BaseOptions(model_asset_path='Affective_Computing/face_landmarker_v2_with_blendshapes.task')
options = vision.FaceLandmarkerOptions(base_options=base_options, output_face_blendshapes=True,
                                       output_facial_transformation_matrixes=True, num_faces=1, )
detector = vision.FaceLandmarker.create_from_options(options)

model = TrainedInceptionResnetV2.load_model()

W0000 00:00:1699835480.612474       1 face_landmarker_graph.cc:169] Sets FaceBlendshapesGraph acceleration to xnnpack by default.
INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [4]:
im_path = "Affective_Computing/Sample_Images/Neutral.png"
# get image as RGB array
img_array = cv2.cvtColor(cv2.imread(im_path), cv2.COLOR_BGR2RGB)
# get image as mediapipe image
img_mp = mp.Image(data=img_array, image_format=mp.ImageFormat.SRGB)
face_landmarks, _, _ = get_pipe_data(detector, img_mp)
ref_face = FaceCloud(face_landmarks)
ref_face.preprocess()


In [5]:
def get_sample_ids(class_count=None, seed=None):
    counts = [24882, 3750, 3803, 6378, 134414, 74874, 25459, 14090]
    label_count = dict(zip(emotions, counts))
    
    if class_count:
        class_count = min([class_count, min(label_count.values())])
    else:
        class_count = min(label_count.values())
        
    ids1 = np.empty((class_count, 0))
    ids2 = np.empty((0, 1))
    
    for idx, emotion in enumerate(emotions):
        file_count = label_count[emotion]
        emIds = np.random.permutation(np.arange(file_count))[:class_count]
        start_idx = sum(counts[:idx])
        ids1 = np.append(ids1, np.expand_dims(emIds, axis=1), axis=1)
        ids2 = np.append(ids2, start_idx + emIds)
        
    return ids1, ids2, label_count

def read_image(emotion, id):
    def num_string(num):
        if num != 0:
            return f"00000{int(num)}"[int(math.log10(num)):]
        else:
            return "000000"
        
    file_path = f"{dataset_path}/{emotion}/{num_string(id)}.png"
    img = cv2.cvtColor(cv2.imread(file_path), cv2.COLOR_BGR2RGB)
    return img


def segment_and_resize(img_array, landmarks, size=None):
    px_locations_x = landmarks[:, 0] * img_array.shape[1]
    px_locations_y = landmarks[:, 1] * img_array.shape[0]
    
    max_x, min_x = max(px_locations_x), min(px_locations_x)
    max_y, min_y = max(px_locations_y), min(px_locations_y)
    
    # create bounding box of face and scale to adjust for full head region
    scale = Vector2(1.8, 1.6)
    bbox = np.asarray([min_x, min_y, max_x-min_x, max_y-min_y], dtype=np.int16)
    face_rect = Rect(bbox).scale_by(scale.x, scale.y)
    face_rect = face_rect.clip(Rect((0, 0), img_array.shape[:2]))
    cropped_img = img_array[face_rect.top:face_rect.bottom, face_rect.left:face_rect.right]
    
    if size:
        cropped_img = cv2.resize(cropped_img, size)
        
    return cropped_img


In [6]:
emotionIDs, imageIDs, count = get_sample_ids()
# IDs are columns
emotionIDs.shape
# IDs are in a row vector
imageIDs.shape


(30000,)

In [10]:
image_size = (299, 299)
shape_data = np.empty((0, 49))
delta_data = np.empty((0, 234))
blend_data = np.empty((0, 52))
net_data = np.empty((0, 8))
labels = np.empty((0, 1))

shape, delta, blend, net = True, True, True, False

for em_idx, emotion in enumerate(emotions):
    # create array to allocate image data - will predict on NN together to leverage 
    # GPU acceleration
    emotion_array = np.empty((0, image_size[0], image_size[1], 3))
    
    for im_count, image_id in tqdm(enumerate(emotionIDs[:, em_idx])):
        try:
            # MxNx3 numpy array
            img_array = read_image(emotion, image_id)
            
            # create mediapipe image object
            img_mp = mp.Image(data=img_array, image_format=mp.ImageFormat.SRGB)
            face_landmarks, blend_feature, _ = get_pipe_data(detector, img_mp)
            
            # create shape feature
            face = FaceCloud(face_landmarks)
            face.preprocess()
            if shape:
                shape_feature = face.create_shape_feature()
                shape_data = np.append(shape_data, np.reshape(shape_feature, (1, -1)), axis=0)
            if delta:
                delta_feature = face.create_delta_feature(ref_face)
                delta_data = np.append(delta_data, np.reshape(delta_feature, (1, -1)), axis=0)
            if blend:
                blend_data = np.append(blend_data, np.reshape(blend_feature, (1, -1)), axis=0)
            if net:
            # crop face and resize for input into neural network
                img_array = segment_and_resize(img_array, face_landmarks, image_size)
                emotion_array = np.append(emotion_array, np.expand_dims(img_array, axis=0), axis=0)
  
            labels = np.append(labels, em_idx)
            
            # perform prediction on images in batches of 64
            if emotion_array.shape[0] == 128 and net:
                emotion_net_data = model.predict(emotion_array, verbose=0)
                net_data = np.append(net_data, emotion_net_data, axis=0)
                emotion_array = np.empty((0, image_size[0], image_size[1], 3))
        except:
            pass
        
    # catch the remaining data 
    if net:
        emotion_net_data = model.predict(emotion_array)
        net_data = np.append(net_data, emotion_net_data, axis=0)
    
    

3750it [01:22, 45.62it/s]
3750it [01:21, 46.14it/s]
3750it [01:22, 45.22it/s]
3750it [01:25, 43.88it/s]
3750it [01:31, 41.17it/s]
3750it [01:31, 40.90it/s]
3750it [01:32, 40.56it/s]
3750it [01:37, 38.51it/s]


In [17]:
train_data = loadmat("Training_Data.mat")
if not net:
    net_data = train_data["net_data"]
if not shape:
    shape_data = train_data["shape_data"]
if not delta:
    delta_data = train_data["delta_data"]
if not blend:
    blend_data = train_data["blend_data"]

train_data = {"shape_data": shape_data, "delta_data": delta_data, "blend_data": blend_data, "net_data": net_data, "labels": labels}
print(shape_data.shape, delta_data.shape, blend_data.shape, net_data.shape)

(29788, 49) (29788, 234) (29788, 52) (29788, 8)


In [18]:
savemat("Training_Data_2.mat", train_data)