In [49]:
import av
import os
import face_recognition
from PIL import Image

import numpy as np
import shutil

In [45]:
INPUT_DIR = 'input_files/'
INTERMEDIARY_DIR = 'intermediary_files/'
OUTPUT_DIR = 'output_files/'

TOP_DIRS = [INPUT_DIR, INTERMEDIARY_DIR, OUTPUT_DIR]

IFRAME_DIR = 'iframes/'
FACECROP_DIR = 'faces/'
RESIDUAL_DIR = 'residual/'

PREPROCESS_DIRS = [IFRAME_DIR, FACECROP_DIR, RESIDUAL_DIR]

In [47]:
for dir in TOP_DIRS:
    try:
        os.makedirs(dir)
    except Exception:
        pass

for dir in PREPROCESS_DIRS:
    try:
        os.makedirs(INTERMEDIARY_DIR + dir)
    except Exception:
        pass

In [3]:
# DS_CDFV1 = 'celeb_df_v1/'
# DS_CDFV2 = 'celeb_df_v2/'

# DS_ORGINAL = 'dataset_original/'
# DS_SPLIT = 'dataset_split/'
# DS_IFRAMES = 'dataset_iframes/'
# DS_FACE = 'dataset_face/'
# DS_FACE_IMG = 'dataset_face_img/'
# DS_SRM_SNIPPETS = 'dataset_srm_snippets_5/'
# DS_SEGMENTS = 'dataset_segments/'
# DS_RAW = 'dataset_raw/'
# DS_RESIDUALS = 'dataset_residuals/'
# DS_TEMPORAL = 'dataset_temporal/'

# DS_FRAME_DIFF = 'dataset_frame_diff/'
# DS_FRAME_DIFF_IMG = 'dataset_frame_diff_img/'

# DS_SEG_COUNT_1 = '1_segment/'
# DS_SEG_COUNT_2 = '2_segments/'
# DS_SEG_COUNT_3 = '3_segments/'
# DS_SEG_COUNT_4 = '4_segments/'
# DS_SEG_COUNT_5 = '5_segments/'

# SEG_COUNT = [DS_SEG_COUNT_1, DS_SEG_COUNT_2, DS_SEG_COUNT_3, DS_SEG_COUNT_4, DS_SEG_COUNT_5]

# SEG_1 = 'seg_1/'
# SEG_2 = 'seg_2/'
# SEG_3 = 'seg_3/'
# SEG_4 = 'seg_4/'
# SEG_5 = 'seg_5/'

# SEG = ['seg_1_', 'seg_2_', 'seg_3_', 'seg_4_', 'seg_5_']

# DS_TRAIN = 'train_dataset/'
# DS_TEST = 'test_dataset/'
# DS_VAL = 'val_dataset/'

# CLASS_FAKE = 'fake/'
# CLASS_REAL = 'real/'


# TOP_LEVEL_1 = [DS_SPLIT, DS_IFRAMES, DS_FACE, DS_FACE_IMG, DS_SRM_SNIPPETS]
# TOP_LEVEL_2 = [DS_SEGMENTS, DS_RAW, DS_RESIDUALS]
# SEGMENTS = [SEG_1, SEG_2, SEG_3, SEG_4, SEG_5]
# SPLIT = [DS_TRAIN, DS_TEST, DS_VAL]
# CLASS = [CLASS_REAL, CLASS_FAKE]

# DATASET = [DS_CDFV1, DS_CDFV2]

# Flow

- Read all videos present in input_files folder
- For each video in the input directory
    - Extract I-Frames and crop faces
    - Extract Extract residuals
    - save face-cropped video and residuals video
    - In Frame-level stream
        - Extract all frames in face-cropped video
        - Take average of prediction results as video score
    - In SRM stream
        - Extract snippets from face-cropped video
        - Take average of prediction results as video score
    - In Temporal stream
        - Extract all residuals from residual video
        - Take average of prediction per segment
        - Select the most extreme value as video score (closest to 0 or 1)
    - In score aggregation
        - Take average of three scores
        - Use voting to determine class (Use extreme value of major class as video score)
        - Use trained svm model to predict class probabilities

# Functions

In [8]:
def extract_iframes(fp):
    input_vid = av.open(fp)
    output_vid = av.open(INTERMEDIARY_DIR + IFRAME_DIR + os.path.split(fp)[1], 'w')

    in_stream = input_vid.streams.video[0]
    in_stream.codec_context.skip_frame = "NONKEY"

    out_stream = output_vid.add_stream(template=in_stream)

    for packet in input_vid.demux(in_stream):
        if packet.dts is None:
            continue

        if packet.is_keyframe:
            packet.stream = out_stream
            output_vid.mux(packet)

    input_vid.close()
    output_vid.close()

In [14]:
# MesoNet works best with images having 256x256 dimension
# If face location borders span a smaller distance, extend the borders
# on either side equally to ensure 256x256 image

def normalize_face_borders(low, high, max_val, req_dim):
    diff = high - low
    if diff >= 256:
        return

    offset = float((req_dim - diff)) / 2
    low = max(0, low - offset)
    high = min(max_val, high + offset)

    return low, high

In [15]:
# Face Location: (left, top, right, bottom)
def modify_crop_window(face_location, height, width, req_dim):
    left, right = normalize_face_borders(face_location[0], face_location[2], width, req_dim)
    top, bot = normalize_face_borders(face_location[1], face_location[3], height, req_dim)

    face_location = (left, top, right, bot)

    return face_location

In [40]:
def save_cropped_faces_to_video(fp, req_dim):
    input = av.open(fp)
    output = av.open(INTERMEDIARY_DIR + FACECROP_DIR + os.path.split(fp)[1], 'w')

    in_stream = input.streams.video[0]
    codec_name = in_stream.codec_context.name

    # output video dimension should be 256x256
    out_stream = output.add_stream(codec_name, rate=8)
    out_stream.width = 256
    out_stream.height = 256
    out_stream.pix_fmt = in_stream.codec_context.pix_fmt

    for frame in input.decode(in_stream):
        img_frame = frame.to_image()
        nd_frame = frame.to_ndarray()

        # Face location returned by face_recognition api: [(top, right, bottom, left)]
        # Origin considered at top left corner of image => right margin > left margin, bottom > top
        face_location = face_recognition.api.face_locations(nd_frame)

        # if can't find a face, then skip that frame
        # TODO : sync frame skipping with temporality stream
        if len(face_location) == 0:
            continue

        # Face location required by PIL.Image: (left, top, right, bottom)
        face_location = (face_location[0][3], face_location[0][0], 
                         face_location[0][1], face_location[0][2])
            
        # Modify crop window size only if positive value given.
        if (req_dim > 0):    
            face_location = modify_crop_window(face_location, img_frame.height, img_frame.width, req_dim)
            
        img_frame = img_frame.crop(face_location)
        
        out_frame = av.VideoFrame.from_image(img_frame)
        out_packet = out_stream.encode(out_frame)
        output.mux(out_packet)

    out_packet = out_stream.encode(None)
    output.mux(out_packet)

    input.close()
    output.close()

In [50]:
def compute_residual(a, b):
    return Image.fromarray(np.asarray(a) - np.asarray(b))

In [68]:
def extract_residuals(fp):
    input_vid = av.open(fp)
    output_vid = av.open(INTERMEDIARY_DIR + RESIDUAL_DIR + os.path.split(fp)[1], 'w')

    in_stream = input_vid.streams.video[0]
    codec_name = in_stream.codec_context.name

    # output video dimension should be 256x256
    out_stream = output_vid.add_stream(codec_name, rate=8)
    out_stream.width = 224
    out_stream.height = 224
    out_stream.pix_fmt = in_stream.codec_context.pix_fmt

    # Extract residuals
    frame_list = [frame for frame in input_vid.decode()]
    
    input_vid.seek(0)
    iframe_index = [i for i, packet in enumerate(input_vid.demux()) if packet.is_keyframe]

    residuals = []
    gop_start_index = 0
    for index in iframe_index:
        if index == 0:
            continue

        residual = compute_residual(frame_list[index - 1].to_image(), frame_list[gop_start_index].to_image())
        out_frame = av.VideoFrame.from_image(residual)
        out_packet = out_stream.encode(out_frame)
        output_vid.mux(out_packet)

        gop_start_index = index

    residual = compute_residual(frame_list[-1].to_image(), frame_list[gop_start_index].to_image())
    out_frame = av.VideoFrame.from_image(residual)
    out_packet = out_stream.encode(out_frame)
    output_vid.mux(out_packet)

    out_packet = out_stream.encode(None)
    output_vid.mux(out_packet)

    input_vid.close()
    output_vid.close()

# Execution

In [69]:
def process_video(fp):
    extract_iframes(fp)
    save_cropped_faces_to_video(INTERMEDIARY_DIR + IFRAME_DIR + os.path.split(fp)[1], -1)
    extract_residuals(fp)

In [71]:
filename='temp.mp4'
filename=None

# If no filename was given, process all videos in input directory
if filename == None or not os.path.exists(filename):
    for video in os.listdir(INPUT_DIR):
        process_video(INPUT_DIR + video)

        print(f'Video processed: {video}')

# If filename is a valid file in root directory, process only that file
else:
    process_video(filename)

Video processed: id0_0001.mp4
Video processed: id0_id1_0001.mp4
Video processed: id10_0001.mp4
Video processed: id10_id11_0001.mp4
