In [1]:
import numpy as np
import pandas as pd
import os
from sklearn.preprocessing import StandardScaler
from skimage.metrics import structural_similarity as ssim
import cv2
from tqdm import tqdm
import dlib
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed
import math
import os
import threading
import tensorflow as tf
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
PDD_PATH ='../PDD'

# Helper Function

In [2]:
def extract_video_info(video_path):
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    duration = total_frames / fps
    cap.release()
    return total_frames, fps, duration


In [3]:
def extract_10_frames(video_path, fps, start_time, end_time):
    cap = cv2.VideoCapture(video_path)
    all_frames = []
    
    # Calculate start and end frame numbers
    start_frame = int(start_time * fps)
    end_frame = int(end_time * fps)
    
    # Extract all frames in the time range
    for frame_id in range(start_frame, end_frame):
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_id)
        ret, frame = cap.read()
        if ret:
            all_frames.append(frame)
    
    cap.release()
    
    # Calculate segment size (number of frames per segment)
    total_frames = len(all_frames)
    segment_size = total_frames // 5
    
    # Sample 2 frames from each segment
    sampled_frames = []
    for i in range(5):
        segment_start = i * segment_size
        segment_end = (i + 1) * segment_size
        
        # Get 2 frames from the middle of each segment
        mid_point = (segment_start + segment_end) // 2
        sampled_frames.append(all_frames[mid_point])
        sampled_frames.append(all_frames[mid_point + 1])
    
    return sampled_frames

# Flicker

In [4]:
def detect_flicker(frames):

    flow_magnitudes = []
    prev_gray = cv2.cvtColor(frames[0], cv2.COLOR_BGR2GRAY)

    for i in range(1, len(frames)):
        next_gray = cv2.cvtColor(frames[i], cv2.COLOR_BGR2GRAY)

        # Compute optical flow between prev and current
        flow = cv2.calcOpticalFlowFarneback(prev_gray, next_gray, None,
                                            pyr_scale=0.5, levels=3, winsize=15,
                                            iterations=3, poly_n=5, poly_sigma=1.2, flags=0)
        
        magnitude, _ = cv2.cartToPolar(flow[..., 0], flow[..., 1])
        flow_magnitudes.append(np.mean(magnitude))

        prev_gray = next_gray
    flow_magnitudes = StandardScaler().fit_transform(np.array(flow_magnitudes).reshape(-1, 1)).flatten().tolist()
    return flow_magnitudes

# Lip movement

In [5]:
def get_lip_movement(frames):
    """
    Calculates the variance of lip height movement from a list of frames.
    - Assumes frames are in BGR format.
    - Detects the largest face in each frame.
    - Extracts top and bottom lip landmarks.
    - Returns variance of lip height movement as a single float.
    """
    movements = []

    try:
        for frame in frames:
            try:
                gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                faces = detector(gray)

                if len(faces) == 0:
                    continue

                face = sorted(faces, key=lambda f: f.width() * f.height(), reverse=True)[0]
                landmarks = predictor(gray, face)

                lip_height = landmarks.part(66).y - landmarks.part(62).y
                movements.append(lip_height)

            except Exception as inner_e:
                print(f"Error processing frame: {inner_e}")
                continue

        if len(movements) < 2:
            return [0.0]
        movements = StandardScaler().fit_transform(np.array(movements).reshape(-1, 1)).flatten().tolist()
        result = float(np.mean(np.abs(np.diff(movements))))
        return [result]

    except Exception as e:
        print(f"Unexpected error in get_lip_movement: {e}")
        return [0.0]

# Detect Blink

In [6]:
def detect_blinks(frames):
    """
    Detects the number of blinks from a list of frames.
    Uses a simple eye-aspect-ratio (EAR) based difference between eye heights.
    """

    blink_count = 0

    for frame in frames:
        try:
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            faces = detector(gray)

            for face in faces:
                landmarks = predictor(gray, face)
                left_eye = np.mean([landmarks.part(i).y for i in range(36, 42)])
                right_eye = np.mean([landmarks.part(i).y for i in range(42, 48)])

                eyes_aspect_ratio = abs(left_eye - right_eye)
                if eyes_aspect_ratio < 1.2:  # Simple threshold — tune as needed
                    blink_count += 1

        except Exception as e:
            print(f"Error processing frame in blink detection: {e}")
            continue

    return [blink_count]

# Head Movement Anomalies

In [22]:
from sklearn.preprocessing import StandardScaler
import numpy as np

def extract_head_pose(frames):
    """
    Estimates vertical head movement using the distance between nose tip and chin
    from a list of frames. Always returns a list of 10 normalised values.
    If fewer than 10 values are available, pads with 0.0.
    If more than 10, trims to 10.
    """
    movements = []

    for frame in frames:
        try:
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            faces = detector(gray)

            if len(faces) == 0:
                movements.append(0.0)
                continue

            face = sorted(faces, key=lambda f: f.width() * f.height(), reverse=True)[0]
            landmarks = predictor(gray, face)

            nose_tip = (landmarks.part(30).x, landmarks.part(30).y)
            chin = (landmarks.part(8).x, landmarks.part(8).y)
            distance = float(np.linalg.norm(np.array(nose_tip) - np.array(chin)))
            movements.append(distance)

        except Exception as e:
            print(f"Error processing frame in head pose extraction: {e}")
            movements.append(0.0)

    # Scale only if we have non-zero values
    if any(movements):
        movements = StandardScaler().fit_transform(np.array(movements).reshape(-1, 1)).flatten().tolist()

    # Adjust to exactly 10 elements
    if len(movements) < 10:
        movements += [0.0] * (10 - len(movements))
    elif len(movements) > 10:
        movements = movements[:10]

    return movements

# Pulse

In [8]:
def detect_pulse(frames):
    """
    Extracts pulse signal from a list of frames by averaging the green channel
    in a fixed forehead region. Applies StandardScaler to the signal.
    Returns a normalised NumPy array of pulse signal.
    """
    pulse_signals = []

    for frame in frames:
        try:
            roi = frame[100:150, 100:150, 1]  # green channel from forehead region
            avg_green = np.mean(roi)
            pulse_signals.append(avg_green)
        except Exception as e:
            print(f"Error processing frame in pulse detection: {e}")
            continue

    pulse_signals_signals = StandardScaler().fit_transform(np.array(pulse_signals).reshape(-1, 1)).flatten().tolist()

    return pulse_signals

# PSNR and SSIM

In [9]:
def compute_ssim_psnr(frames):
    psnr_vals = []
    ssim_vals = []

    def calculate_psnr(img1, img2):
        mse = np.mean((img1.astype(np.float32) - img2.astype(np.float32)) ** 2)
        if mse == 0:
            return 100  # identical
        PIXEL_MAX = 255.0
        return 20 * math.log10(PIXEL_MAX / math.sqrt(mse))
    
    for i in range(len(frames) - 1):
        img1 = cv2.cvtColor(frames[i], cv2.COLOR_BGR2GRAY)
        img2 = cv2.cvtColor(frames[i + 1], cv2.COLOR_BGR2GRAY)

        psnr_val = calculate_psnr(img1, img2)
        ssim_val, _ = ssim(img1, img2, full=True)

        psnr_vals.append(psnr_val)
        ssim_vals.append(ssim_val)
    
    psnr_vals = StandardScaler().fit_transform(np.array(psnr_vals).reshape(-1, 1)).flatten().tolist()
    ssim_vals = StandardScaler().fit_transform(np.array(ssim_vals).reshape(-1, 1)).flatten().tolist()

    return psnr_vals, ssim_vals

# Functions for multi thread

In [16]:
def process_video(filename):
    if not filename.lower().endswith('.mp4'):
        return None

    video_path = os.path.join(PDD_PATH, filename)

    try:
        tqdm.write(f"Processing {filename} on thread {threading.current_thread().name}")
        total_frames, fps, duration = extract_video_info(video_path)
        segment_timestamps = [(i * (duration / 5), (i + 1) * (duration / 5)) for i in range(5)]
        feature_lengths = []
        flicker_vals=[]
        lip_var=[]
        blink_feature=[]
        head_movement=[]
        pulse_vals=[]
        psnr_vals=[]
        ssim_vals=[]

        for start_time, end_time in segment_timestamps:
            frames = extract_10_frames(video_path, fps, start_time, end_time)

            flicker_vals += detect_flicker(frames)
            lip_var += get_lip_movement(frames)
            blink_feature += detect_blinks(frames)
            head_movement += extract_head_pose(frames)
            pulse_vals += detect_pulse(frames)
            psnr_val, ssim_val = compute_ssim_psnr(frames)
            psnr_vals+=psnr_val
            ssim_vals+=ssim_val

        feature_lengths.append(len(flicker_vals))
        feature_lengths.append(len(lip_var))
        feature_lengths.append(len(blink_feature))
        feature_lengths.append(len(head_movement))
        feature_lengths.append(len(pulse_vals))
        feature_lengths += [len(psnr_vals), len(ssim_vals)]
        if int(filename[2:4]) <8:
            label = 1
        else:
            label = 0
        return {
            'filename': filename,
            'fake': label,
            "flicker": flicker_vals,
            "lip_movement_variance": lip_var,
            "blink": blink_feature,
            "head_movement": head_movement,
            "pulse": pulse_vals,
            "psnr": psnr_vals,
            "ssim": ssim_vals,
            'feature_lengths': feature_lengths
        }

    except Exception as e:
        tqdm.write(f"Error processing {filename}: {e}")
        return None

In [26]:
filenames = os.listdir(PDD_PATH)

data = []

with ThreadPoolExecutor(max_workers=1) as executor:
    futures = [executor.submit(process_video, fname) for fname in filenames]
    for future in tqdm(as_completed(futures), total=len(futures), desc="Processing videos"):
        result = future.result()
        if result:
            data.append(result)

# Convert to DataFrame
df_features = pd.DataFrame(data)
df_features.set_index('filename', inplace=True)

# Show summary
print("\n Finished processing first 2 videos.")

Processing b-08.mp4 on thread ThreadPoolExecutor-1_0


Processing videos:   3%|▎         | 1/30 [00:10<05:12, 10.79s/it]

Processing b-09.mp4 on thread ThreadPoolExecutor-1_0


Processing videos:   7%|▋         | 2/30 [00:23<05:38, 12.08s/it]

Processing t-09.mp4 on thread ThreadPoolExecutor-1_0


Processing videos:  10%|█         | 3/30 [00:33<04:55, 10.95s/it]

Processing t-08.mp4 on thread ThreadPoolExecutor-1_0


Processing videos:  13%|█▎        | 4/30 [00:43<04:32, 10.48s/it]

Processing b-15.mp4 on thread ThreadPoolExecutor-1_0


Processing videos:  17%|█▋        | 5/30 [00:53<04:23, 10.53s/it]

Processing b-01.mp4 on thread ThreadPoolExecutor-1_0


Processing videos:  20%|██        | 6/30 [01:02<03:54,  9.79s/it]

Processing t-05.mp4 on thread ThreadPoolExecutor-1_0


Processing videos:  23%|██▎       | 7/30 [01:11<03:43,  9.73s/it]

Processing t-04.mp4 on thread ThreadPoolExecutor-1_0


Processing videos:  27%|██▋       | 8/30 [01:18<03:15,  8.89s/it]

Processing t-10.mp4 on thread ThreadPoolExecutor-1_0


Processing videos:  30%|███       | 9/30 [01:29<03:16,  9.35s/it]

Processing b-00.mp4 on thread ThreadPoolExecutor-1_0


Processing videos:  33%|███▎      | 10/30 [01:37<03:00,  9.02s/it]

Processing b-14.mp4 on thread ThreadPoolExecutor-1_0


Processing videos:  37%|███▋      | 11/30 [01:43<02:34,  8.12s/it]

Processing b-02.mp4 on thread ThreadPoolExecutor-1_0


Processing videos:  40%|████      | 12/30 [01:51<02:26,  8.14s/it]

Processing t-06.mp4 on thread ThreadPoolExecutor-1_0


Processing videos:  43%|████▎     | 13/30 [02:02<02:32,  8.97s/it]

Processing t-12.mp4 on thread ThreadPoolExecutor-1_0


Processing videos:  47%|████▋     | 14/30 [02:10<02:19,  8.71s/it]

Processing t-13.mp4 on thread ThreadPoolExecutor-1_0


Processing videos:  50%|█████     | 15/30 [02:22<02:22,  9.49s/it]

Processing t-07.mp4 on thread ThreadPoolExecutor-1_0


Processing videos:  53%|█████▎    | 16/30 [02:31<02:13,  9.51s/it]

Processing b-03.mp4 on thread ThreadPoolExecutor-1_0


Processing videos:  57%|█████▋    | 17/30 [02:40<02:01,  9.37s/it]

Processing b-07.mp4 on thread ThreadPoolExecutor-1_0


Processing videos:  60%|██████    | 18/30 [02:49<01:51,  9.32s/it]

Processing b-13.mp4 on thread ThreadPoolExecutor-1_0


Processing videos:  63%|██████▎   | 19/30 [03:01<01:49,  9.92s/it]

Processing t-03.mp4 on thread ThreadPoolExecutor-1_0


Processing videos:  67%|██████▋   | 20/30 [03:09<01:33,  9.40s/it]

Processing t-02.mp4 on thread ThreadPoolExecutor-1_0


Processing videos:  70%|███████   | 21/30 [03:17<01:22,  9.18s/it]

Processing b-12.mp4 on thread ThreadPoolExecutor-1_0


Processing videos:  73%|███████▎  | 22/30 [03:27<01:14,  9.33s/it]

Processing b-06.mp4 on thread ThreadPoolExecutor-1_0


Processing videos:  77%|███████▋  | 23/30 [03:35<01:01,  8.76s/it]

Processing b-10.mp4 on thread ThreadPoolExecutor-1_0


Processing videos:  80%|████████  | 24/30 [03:42<00:50,  8.40s/it]

Processing t-14.mp4 on thread ThreadPoolExecutor-1_0


Processing videos:  83%|████████▎ | 25/30 [03:53<00:45,  9.12s/it]

Processing t-00.mp4 on thread ThreadPoolExecutor-1_0


Processing videos:  87%|████████▋ | 26/30 [04:02<00:36,  9.13s/it]

Processing t-01.mp4 on thread ThreadPoolExecutor-1_0


Processing videos:  90%|█████████ | 27/30 [04:12<00:27,  9.24s/it]

Processing t-15.mp4 on thread ThreadPoolExecutor-1_0


Processing videos:  93%|█████████▎| 28/30 [04:24<00:20, 10.06s/it]

Processing b-05.mp4 on thread ThreadPoolExecutor-1_0


Processing videos:  97%|█████████▋| 29/30 [04:34<00:10, 10.16s/it]

Processing b-11.mp4 on thread ThreadPoolExecutor-1_0


Processing videos: 100%|██████████| 30/30 [04:41<00:00,  9.40s/it]


 Finished processing first 2 videos.





In [27]:
df_features

Unnamed: 0_level_0,fake,flicker,lip_movement_variance,blink,head_movement,pulse,psnr,ssim,feature_lengths
filename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
b-08.mp4,0,"[-0.7271178364753723, 1.650829792022705, -0.69...","[1.1111111111111112, 0.7537783614444091, 0.952...","[6, 0, 1, 1, 2]","[-1.2751419341699497, -1.8973103806689957, -0....","[19.1752, 19.1728, 19.814, 19.8668, 19.3068, 1...","[0.7573672478334952, -1.2610070849997308, 0.72...","[0.6708920182845275, -1.3536855272618769, 0.94...","[45, 5, 5, 50, 50, 45, 45]"
b-09.mp4,0,"[-0.7352153658866882, -0.06433255225419998, -0...","[0.8395662798164726, 1.0619766763577223, 0.828...","[2, 0, 0, 0, 4]","[-0.2860707930511538, -0.2860707930511538, -1....","[98.8368, 99.5108, 97.4772, 96.902, 93.8928, 9...","[0.6902879105596079, -0.7858917294454759, 1.36...","[0.9224829297966465, -0.5719733636365226, 1.09...","[45, 5, 5, 50, 50, 45, 45]"
t-09.mp4,0,"[-0.3087025582790375, 1.8230574131011963, -0.8...","[0.5243788202755697, 0.8985956492257652, 0.614...","[8, 10, 9, 7, 7]","[0.5326861616166986, 0.5326861616166986, 0.447...","[57.6116, 57.7592, 57.2744, 57.2916, 56.8808, ...","[-0.14668754610983095, -1.158672486023009, 0.3...","[0.36958386104134977, -1.652903457704924, 0.81...","[45, 5, 5, 50, 50, 45, 45]"
t-08.mp4,0,"[-1.0070769786834717, 0.8262362480163574, -1.1...","[1.088662107903635, 1.311859599864291, 0.85470...","[0, 3, 4, 2, 0]","[-0.8148675349460605, -0.8148675349460605, -0....","[72.908, 72.9028, 72.5716, 72.5696, 72.57, 72....","[1.3230831674506274, -0.9179443804713251, 1.78...","[1.020598634047629, -0.9507252149224188, 1.117...","[45, 5, 5, 50, 50, 45, 45]"
b-15.mp4,0,"[-0.8217584490776062, 2.055525064468384, -0.72...","[0.0, 1.25, 0.7350904618932951, 0.867801596116...","[0, 1, 0, 0, 0]","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[170.486, 170.4872, 170.608, 170.606, 170.8984...","[1.0498965894552164, -1.2680845689369016, 0.24...","[0.9001384801481376, -1.4031126741124018, 0.73...","[45, 5, 5, 50, 50, 45, 45]"
b-01.mp4,1,"[-0.8112826347351074, 1.4683659076690674, -0.5...","[1.0830607221477648, 0.6550070497184036, 1.060...","[0, 8, 1, 2, 0]","[-1.408794964806644, -1.408794964806644, 1.150...","[84.8596, 84.8596, 84.8596, 84.8596, 84.8596, ...","[1.2594844812701942, -1.367828824711328, 0.116...","[0.80756619969941, -1.7976031604738176, 0.6214...","[45, 5, 5, 50, 50, 45, 45]"
t-05.mp4,1,"[-0.8409959673881531, 1.9394997358322144, -0.8...","[0.9494253265550826, 0.7149065624813993, 0.937...","[0, 0, 0, 0, 7]","[0.5599150812251459, 0.5665222076132084, -1.99...","[111.8868, 111.8868, 111.8868, 111.8868, 111.4...","[1.0107683132143965, -1.1261747295561992, 0.96...","[0.9457259316327042, -1.4990528777585042, 0.96...","[45, 5, 5, 50, 50, 45, 45]"
t-04.mp4,1,"[-0.5877202153205872, 2.283911943435669, -0.74...","[0.8639187954496622, 1.111111111111111, 0.5939...","[1, 4, 0, 0, 0]","[0.5525320448637286, 0.4618810757950802, -1.99...","[99.8308, 99.2296, 97.342, 97.226, 84.9856, 84...","[0.3204359924534859, -1.4788425221875032, 1.38...","[0.5293111696608028, -1.5817853119801977, 1.03...","[45, 5, 5, 50, 50, 45, 45]"
t-10.mp4,0,"[-0.6566253304481506, 1.8760687112808228, -0.9...","[0.928476690885259, 0.5432235633763386, 0.6025...","[0, 1, 0, 0, 4]","[-1.4664127344361433, -1.3771840981929615, 1.5...","[79.566, 79.6396, 77.3744, 77.5776, 77.36, 77....","[0.28665158635994537, -1.3224705770777507, 0.6...","[0.8453051895177661, -1.507481278252177, 0.937...","[45, 5, 5, 50, 50, 45, 45]"
b-00.mp4,1,"[-0.307506263256073, 2.4255805015563965, -0.70...","[0.6085806194501846, 0.8844775024088793, 0.978...","[9, 5, 6, 4, 7]","[-1.7459646715545223, -2.0594027313882046, 0.9...","[89.8044, 89.998, 90.3408, 90.02, 90.074, 89.9...","[-0.38566932499027573, -1.3952524925851661, 0....","[0.30350235929736924, -1.8795401744253186, 0.8...","[45, 5, 5, 50, 50, 45, 45]"


# TensorFlow functions

In [28]:
def _bytes_feature(value):
    """Convert a string / byte to a TFRecord bytes feature."""
    if isinstance(value, type(tf.constant(0))):
        value = value.numpy()
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
    """Convert float values to a TensorFlow feature."""
    if isinstance(value, str):
        # Convert string representation of list to numpy array
        value = np.array(eval(value))
    elif not isinstance(value, np.ndarray):
        value = np.array([value])
    return tf.train.Feature(float_list=tf.train.FloatList(value=value.flatten()))

def _int_feature(value):
    """Convert an integer value to a TensorFlow feature."""

In [29]:
def serialize_features(row, index):
    """Convert a row of features into a TFRecord format."""
    feature_dict = {
        'filename': _bytes_feature(str(index).encode('utf-8')),
        'flicker': _float_feature(row['flicker']),
        'lip_movement_variance': _float_feature(row['lip_movement_variance']),
        'blink': _float_feature(row['blink']),
        'head_movement': _float_feature(row['head_movement']),
        'pulse': _float_feature(row['pulse']),
        'psnr': _float_feature(row['psnr']),
        'ssim': _float_feature(row['ssim']),
        'feature_lengths': _float_feature(row['feature_lengths'])
    }
    
    return tf.train.Example(features=tf.train.Features(feature=feature_dict)).SerializeToString()

def write_tfrecord(df_features, output_file="pdd_features.tfrecord"):
    """Write DataFrame features to a TFRecord file."""
    with tf.io.TFRecordWriter(output_file) as writer:
        for idx, row in tqdm(df_features.iterrows(), total=len(df_features), desc="Writing TFRecord"):
            example = serialize_features(row, idx)
            writer.write(example)
    
    print(f"TFRecord saved: {output_file}")

In [None]:
def read_tfrecord(tfrecord_path):
    """Read TFRecord file and convert to DataFrame."""
    
    # Define feature description for parsing
    feature_description = {
        'filename': tf.io.FixedLenFeature([], tf.string),
        'flicker': tf.io.VarLenFeature(tf.float32),
        'lip_movement_variance': tf.io.VarLenFeature(tf.float32),
        'blink': tf.io.VarLenFeature(tf.float32),
        'head_movement': tf.io.VarLenFeature(tf.float32),
        'pulse': tf.io.VarLenFeature(tf.float32),
        'psnr': tf.io.VarLenFeature(tf.float32),
        'ssim': tf.io.VarLenFeature(tf.float32),
        'feature_lengths': tf.io.VarLenFeature(tf.float32)
    }

    def _parse_function(example_proto):
        """Parse TFRecord example."""
        parsed_features = tf.io.parse_single_example(example_proto, feature_description)
        
        # Convert sparse tensors to dense and then to numpy arrays
        features = {}
        for key in parsed_features:
            if key == 'filename':
                features[key] = parsed_features[key].numpy().decode('utf-8')
            else:
                features[key] = tf.sparse.to_dense(parsed_features[key]).numpy()
        
        return features

    # Read TFRecord file
    dataset = tf.data.TFRecordDataset(tfrecord_path)
    
    # Parse all examples
    data = []
    for raw_record in dataset:
        features = _parse_function(raw_record)
        data.append(features)
    
    # Convert to DataFrame
    df = pd.DataFrame(data)
    df.set_index('filename', inplace=True)
    
    return df

In [30]:
write_tfrecord(df_features)

Writing TFRecord:   0%|          | 0/30 [00:00<?, ?it/s]

Writing TFRecord: 100%|██████████| 30/30 [00:00<00:00, 1660.21it/s]

TFRecord saved: pdd_features.tfrecord



