
This script finds the facial keypoint and compute keypoint distance between ground truth and generated frames. 
we use Mediapipe to find facial keypoints.
To run the script, install cv2, numpy, mediapipe and pandas


maybe you will need to run following command on your envirement before running the script (if you get error)
!export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python

we use face_landmark env

In [1]:

from re import X
import cv2
import numpy as np
import mediapipe as mp
import pandas as pd
import os

Change following paths to match your ground-truth path (ground_truth_parent_directory) and path where the generated frames by your algorithm  are located (generated_parent_directory)

In [2]:
ground_truth_parent_directory = '3D_real_head_dataset/images/ground_truth/'
generated_parent_directory = 'DAGAN_results_3D_heads/'

In [3]:

#Directories containing ground-truth 
ground_truth_directories = [ground_truth_parent_directory+d for d in os.listdir(ground_truth_parent_directory) 
                            if os.path.isdir(os.path.join(ground_truth_parent_directory, d))]
#Directories containing generated reenactment frames 
generated_directories = [generated_parent_directory+d for d in os.listdir(generated_parent_directory) 
                         if os.path.isdir(os.path.join(generated_parent_directory, d))] 

#For each ground-truth find all deepfake folders that contain the same head rotation.
#ground-truth folder is key and the value is gnerated folders (both contain the same head rotation)
gt_gen_path_dict = {}
for dir_gt in ground_truth_directories:
    rotation_type_gt = dir_gt.split('/')[-1].split('_')[0] #head rotation of ground-truth
    if rotation_type_gt not in gt_gen_path_dict:
                gt_gen_path_dict[dir_gt] = []
    #if the same head rotation is found in generated folder, add it as a value to dict
    for dir_gen in generated_directories: 
        rotation_type_gen = dir_gen.split('/')[-1].split('_')[0]
        
        if rotation_type_gen == rotation_type_gt:
            gt_gen_path_dict[dir_gt].append(dir_gen)

In [4]:
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True)

In [5]:
def MSE(x_list_orig, y_list_orig, x_list, y_list):
    """
    Compute SSIM score between video frames.
    Args:
    orig_frame: ground-truth frame.
    fake_frame: fake frame
    Returns:
    SSIM score between ground-truth and fake frame
    """
    #computes mean squared error for one frame
    x_difference = (np.array(x_list_orig) - np.array(x_list))**2
    y_difference = (np.array(y_list_orig) - np.array(y_list))**2
    squared_error = np.sqrt(x_difference + y_difference)
    MSE = squared_error.mean()
    return MSE


def compute_AKD(path_fake, path_gr):
    """
    Compute average keypoint distance between two video frames.
    Args:
    path_fake: path belonging to fake video frame.
    path_gr:path belonging to ground-truth  video frame. 
    Returns:
    Mean Square Error between keypoints
    """
    x1_list = []
    y1_list = []
    x2_list = []
    y2_list = []
    original_img =  cv2.imread(path_gr)
    fake_img =  cv2.imread(path_fake)
    height, width, _ = original_img.shape
    result_original = face_mesh.process(cv2.cvtColor(original_img, cv2.COLOR_BGR2RGB))
    result_fake = face_mesh.process(cv2.cvtColor(fake_img, cv2.COLOR_BGR2RGB))
        
    if result_original.multi_face_landmarks is not None:
        for idx, facial_landmarks in enumerate(result_original.multi_face_landmarks):
            for i in range(0, 468): #468
                pt1 = facial_landmarks.landmark[i]
                x1 = int(pt1.x * width)   #scale the landmarks to fit the image size
                y1 = int(pt1.y * height)
                x1_list.append(x1)
                y1_list.append(y1)
                #saving results for later. if the landmark landmark can not be found we use the landmark of last frame
                pr_x1_list = x1_list
                pr_y1_list = y1_list
                
    if result_fake.multi_face_landmarks is not None:            
        for idx, facial_landmarks in enumerate(result_fake.multi_face_landmarks):
            for i in range(0, 468): #468
                pt2 = facial_landmarks.landmark[i]
                x2 = int(pt2.x * width)   #scale the landmarks to fit the image size
                y2 = int(pt2.y * height)
                x2_list.append(x2)
                y2_list.append(y2)
                pr_x2_list = x2_list
                pr_y2_list = y2_list
    #if there is occlussion or the keypoints can not be found===========
    if result_original.multi_face_landmarks is None:
        print(path_gr)
        x1_list = pr_x1_list
        y1_list = pr_y1_list 
        with open ('fileFailed.txt', 'w') as file:  
            file.write(path_gr)  
            file.write('\n')  
    
    if result_fake.multi_face_landmarks is None:
        print(path_fake)
        x2_list = pr_x2_list
        y2_list = pr_y2_list  
        with open ('fileFailed.txt', 'w') as file:  
            file.write(path_fake) 
            file.write('\n')  
    
    #compute mean square error                    
    mse = MSE(x1_list, y1_list, x2_list, y2_list)
    return mse

def compute_AKD_per_frame(original_path, path1, path2, path3, path4):
    """
    Compute average leypoint distance between ground truth and generated frames for one video (100 frames). We use 4 different 
    identities to generate the same head rotation 
    Args:
    original_path: path to ground-truth directory
    path1, path2, path3, path4: path to deepfake generated directories. Inside each path we have fake frames generated
    by different identities
    Returns:
    A list of AKD scores between the ground-truth frames and fake frames. The score for each frame is the average
    over the four generated images.
    """
    ssim_list = []
    ground_truth_frames_212 = [original_path + '/' + d for d in sorted(os.listdir(original_path))]
    generated_frames_122 = [path1 + '/' + d for d in sorted(os.listdir(path1))]
    generated_frames_340 = [path2 + '/' + d for d in sorted(os.listdir(path2))]
    generated_frames_344 = [path3 + '/' + d for d in sorted(os.listdir(path3))]
    generated_frames_359 = [path4 + '/' + d for d in sorted(os.listdir(path4))]
    print(original_path)
    for ground_truth, gen_122, gen_340, gen_344, gen_359 in zip(ground_truth_frames_212, 
                                                            generated_frames_122, 
                                                            generated_frames_340,
                                                            generated_frames_344,
                                                            generated_frames_359):
        
        AKD_score_122 = compute_AKD(ground_truth, gen_122 )
        AKD_score_340 = compute_AKD(ground_truth, gen_340)
        AKD_score_344 = compute_AKD(ground_truth, gen_344)
        AKD_score_359= compute_AKD(ground_truth, gen_359)
        
        mean_ssim = (AKD_score_122+AKD_score_340+AKD_score_344+AKD_score_359)/4 
        ssim_list.append(mean_ssim)
    return ssim_list

In [6]:
def main():
    
    # Create a dataframe to store image name and SSIM score for different head rotation per frame
    dataframe = pd.DataFrame({'image_name': []})
    for groundtruth_path, generate_path  in gt_gen_path_dict.items():
        names = [d for d in sorted(os.listdir(groundtruth_path))] #video frame names
        dataframe["image_name"] = names 
        AKD_list = []
        rotatio_type = groundtruth_path.split('/')[-1].split('_')[0]
        print(rotatio_type)
        #In the paper we only evaluate the three head rotations below
        if rotatio_type == "pitchNegative": #head rotation of ground-truth
            AKD_list = compute_AKD_per_frame(groundtruth_path, generate_path[0], generate_path[1], 
                                                generate_path[2], generate_path[3])
            dataframe["pitchNegative"] = AKD_list
            
            
        if rotatio_type == "yawPositive": #head rotation of ground-truth
            AKD_list = compute_AKD_per_frame(groundtruth_path, generate_path[0], generate_path[1], 
                                                generate_path[2], generate_path[3])
            dataframe["yawPositive"] = AKD_list
            
                    
        if rotatio_type == "pitchYawPositive": #head rotation of ground-truth
            AKD_list = compute_AKD_per_frame(groundtruth_path, generate_path[0], generate_path[1], 
                                                generate_path[2], generate_path[3])
            dataframe["pitchYawPositive"] = AKD_list
    
    print(dataframe)   
    csv_path =   "cv_scores/"+generated_parent_directory
    if not os.path.exists(csv_path):
        os.makedirs(csv_path, exist_ok=True)
                
    dataframe.to_csv(csv_path+"AKD_scores_per_frame.csv")  

if __name__ == "__main__":
    main()


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


pitchNegative
3D_real_head_dataset/images/ground_truth/pitchNegative_id212
pitchPositive
yawPositive
3D_real_head_dataset/images/ground_truth/yawPositive_id212
pitchYawNegative
yawNegative
pitchYawPositive
3D_real_head_dataset/images/ground_truth/pitchYawPositive_id212
    image_name  pitchNegative  yawPositive  pitchYawPositive
0   00000.jpeg       0.789640     0.789640          0.789327
1   00001.jpeg       1.075664     1.135425          0.929548
2   00002.jpeg       0.739468     1.153049          1.062155
3   00003.jpeg       1.121030     1.063673          1.049822
4   00004.jpeg       1.083410     1.077800          1.155328
..         ...            ...          ...               ...
95  00095.jpeg       3.000428     4.558575          8.849054
96  00096.jpeg       3.014359     4.738374          8.479550
97  00097.jpeg       3.058185     4.727494          8.820415
98  00098.jpeg       3.211079     4.720431          8.899235
99  00099.jpeg       2.969148     4.758453          9.74374