In [1]:
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
import cv2
import pandas as pd
import os

In [2]:
base_options = python.BaseOptions(model_asset_path='face_landmarker_v2_with_blendshapes.task')
options = vision.FaceLandmarkerOptions(base_options=base_options,
                                       output_face_blendshapes=True,
                                       output_facial_transformation_matrixes=True,
                                       num_faces=1)
detector = vision.FaceLandmarker.create_from_options(options)

In [3]:
dir_sad = '../face_sad'
dir_happy = '../face_happy'
dir_neutral = '../face_neutral'
dir_angry = '../face_angry'
dir_surprised = '../face_surprised'
dir_disgusted = '../face_disgusted'

images_sad = [os.path.join(dir_sad, img) for img in os.listdir(dir_sad)]
images_happy = [os.path.join(dir_happy, img) for img in os.listdir(dir_happy)]
images_neutral = [os.path.join(dir_neutral, img) for img in os.listdir(dir_neutral)]
images_angry = [os.path.join(dir_angry, img) for img in os.listdir(dir_angry)]
images_surprised = [os.path.join(dir_surprised, img) for img in os.listdir(dir_surprised)]
images_disgusted = [os.path.join(dir_disgusted, img) for img in os.listdir(dir_disgusted)]

images = images_sad + images_happy + images_neutral + images_angry + images_surprised + images_disgusted

print(len(images))

51358


In [4]:
def inference(image):
    detection_result = detector.detect(image)
    if detection_result is None or not len(detection_result.face_blendshapes):
        return None
    blendshapes = detection_result.face_blendshapes[0]
    return blendshapes

In [5]:
def resize_if_too_small(photo):
    if photo.shape[0] < 300 and photo.shape[1] < 300:
        photo = cv2.resize(photo, (300, 300), interpolation = cv2.INTER_CUBIC)
    return photo

In [6]:
blendshapes = []
total = len(images)
for image_id in range(total):
    try:
        cv2image = cv2.imread(images[image_id])
        cv2image = resize_if_too_small(cv2image)
        inferenceinput = mp.Image(image_format=mp.ImageFormat.SRGB, data=cv2image)
    except (RuntimeError, AttributeError):
        continue
    newinference = inference(inferenceinput)
    if newinference is not None:
        scores = [cat.score for cat in newinference]
        scores.append(images[image_id])
        blendshapes.append(scores)
    print(len(blendshapes), '/', total)
print(len(blendshapes))

1 / 51358
2 / 51358
3 / 51358
4 / 51358
5 / 51358
5 / 51358
6 / 51358
7 / 51358
8 / 51358
8 / 51358
9 / 51358
10 / 51358
11 / 51358
12 / 51358
13 / 51358
14 / 51358
15 / 51358
16 / 51358
17 / 51358
18 / 51358
19 / 51358
19 / 51358
20 / 51358
20 / 51358
21 / 51358
22 / 51358
22 / 51358
23 / 51358
24 / 51358
25 / 51358
26 / 51358
27 / 51358
28 / 51358
29 / 51358
30 / 51358
30 / 51358
31 / 51358
32 / 51358
33 / 51358
34 / 51358
35 / 51358
36 / 51358
37 / 51358
38 / 51358
39 / 51358
39 / 51358
40 / 51358
41 / 51358
42 / 51358
43 / 51358
43 / 51358
43 / 51358
44 / 51358
45 / 51358
46 / 51358
47 / 51358
48 / 51358
48 / 51358
49 / 51358
50 / 51358
51 / 51358
52 / 51358
53 / 51358
53 / 51358
54 / 51358
55 / 51358
56 / 51358
57 / 51358
58 / 51358
59 / 51358
60 / 51358
61 / 51358
62 / 51358
63 / 51358
64 / 51358
64 / 51358
64 / 51358
65 / 51358
66 / 51358
67 / 51358
68 / 51358
69 / 51358
70 / 51358
71 / 51358
72 / 51358
73 / 51358
74 / 51358
75 / 51358
76 / 51358
77 / 51358
78 / 51358
79 / 51358

In [7]:
blendshapes[0]

[9.127605693493024e-08,
 0.01627349853515625,
 0.01603768579661846,
 0.07734277844429016,
 0.19470325112342834,
 0.05592476576566696,
 9.990095350076444e-06,
 1.6290123028284142e-08,
 3.962332328910634e-08,
 0.0697910413146019,
 0.010431375354528427,
 0.0751325935125351,
 0.08235176652669907,
 0.05561487749218941,
 0.09320952743291855,
 0.11484739929437637,
 0.08330687135457993,
 0.15736287832260132,
 0.14274418354034424,
 0.40807539224624634,
 0.020799772813916206,
 0.03351902216672897,
 0.0212506502866745,
 0.00017151559586636722,
 0.059138763695955276,
 0.00029960405663587153,
 3.4960063999278645e-07,
 8.993946721602697e-06,
 0.00014846469275653362,
 0.005091389641165733,
 0.06213235855102539,
 0.0498124398291111,
 0.0004475779423955828,
 0.03642001003026962,
 0.00760255241766572,
 0.0018470892682671547,
 0.004293991718441248,
 0.22986432909965515,
 0.020729241892695427,
 1.2310626971157035e-06,
 0.004233797546476126,
 0.0019323326414451003,
 0.041704367846250534,
 0.004688210319727

In [8]:
filenames = []
features = []

for case in blendshapes:
    newfeatureset = {'eyeLookDownLeft': case[11], 'eyeLookDownRight': case[12], 'eyeLookInLeft': case[13], 'eyeLookInRight': case[14], 'eyeLookOutLeft': case[15], 'eyeLookOutRight': case[16], 'eyeLookUpLeft': case[17], 'eyeLookUpRight': case[18]}
    features.append(newfeatureset)
    filenames.append(case[52])

df = pd.DataFrame(features, index=filenames)
df.head()


Unnamed: 0,eyeLookDownLeft,eyeLookDownRight,eyeLookInLeft,eyeLookInRight,eyeLookOutLeft,eyeLookOutRight,eyeLookUpLeft,eyeLookUpRight
../face_sad\10006.jpg,0.075133,0.082352,0.055615,0.09321,0.114847,0.083307,0.157363,0.142744
../face_sad\10008.jpg,0.412534,0.810497,0.013384,0.335424,0.301499,0.010262,0.056671,0.001307
../face_sad\10020.jpg,0.234582,0.212248,0.029172,0.073818,0.110335,0.058965,0.050986,0.058155
../face_sad\10028.jpg,0.0888,0.10271,0.545643,0.003591,0.00488,0.578628,0.112583,0.105195
../face_sad\1003.jpg,0.520485,0.538734,0.020367,0.174693,0.174109,0.020325,0.010766,0.02331


In [9]:
df['eye_Y'] = (df['eyeLookUpLeft'] + df['eyeLookUpRight'] - df['eyeLookDownLeft'] - df['eyeLookDownRight']) / 2
df.drop(['eyeLookUpLeft', 'eyeLookUpRight', 'eyeLookDownLeft', 'eyeLookDownRight'], axis=1, inplace=True)
df.describe()

Unnamed: 0,eyeLookInLeft,eyeLookInRight,eyeLookOutLeft,eyeLookOutRight,eye_Y
count,42434.0,42434.0,42434.0,42434.0,42434.0
mean,0.110349,0.192718,0.1966719,0.1175031,-0.057785
std,0.171526,0.208277,0.2097096,0.1757075,0.320359
min,1.6e-05,1.4e-05,3.216547e-07,1.50039e-08,-0.999635
25%,0.011949,0.041379,0.04389963,0.016482,-0.272417
50%,0.037935,0.121792,0.1232507,0.04403473,-0.047072
75%,0.122423,0.265228,0.2730332,0.1309457,0.157466
max,0.999658,0.999976,0.9990792,0.9989679,0.993426


In [10]:
df['eye_X'] = (df['eyeLookOutLeft'] + df['eyeLookInRight'] - df['eyeLookInLeft'] - df['eyeLookOutRight']) / 2
df.drop(['eyeLookOutLeft', 'eyeLookOutRight', 'eyeLookInLeft', 'eyeLookInRight'], axis=1, inplace=True)
df.describe()

Unnamed: 0,eye_Y,eye_X
count,42434.0,42434.0
mean,-0.057785,0.080769
std,0.320359,0.323381
min,-0.999635,-0.989144
25%,-0.272417,-0.074469
50%,-0.047072,0.082589
75%,0.157466,0.24451
max,0.993426,0.999055


In [12]:
df.to_parquet('ground_truth.parquet', index=True)