# Libraries Used

* ffmpeg-python
* av
* cmake
* dlib  (based on the python version)
* face-recognition

# Imports & Globals

In [1]:
import av
import face_recognition
import PIL

import numpy as np
import os
import random
import shutil

In [6]:
DS_CDFV1 = 'celeb_df_v1/'
DS_CDFV2 = 'celeb_df_v2/'

DS_ORGINAL = 'dataset_original/'
DS_SPLIT = 'dataset_split/'
DS_IFRAMES = 'dataset_iframes/'
DS_FACE = 'dataset_face/'
DS_FACE_IMG = 'dataset_face_img/'
DS_SEGMENTS = 'dataset_segments/'
DS_RAW = 'dataset_raw/'
DS_RESIDUALS = 'dataset_residuals/'

SEG_1 = 'seg_1/'
SEG_2 = 'seg_2/'
SEG_3 = 'seg_3/'
SEG_4 = 'seg_4/'
SEG_5 = 'seg_5/'

DS_TRAIN = 'train_dataset/'
DS_TEST = 'test_dataset/'
DS_VAL = 'val_dataset/'

CLASS_FAKE = 'fake/'
CLASS_REAL = 'real/'

DATASET = [DS_CDFV1, DS_CDFV2]
TOP_LEVEL_1 = [DS_SPLIT, DS_IFRAMES, DS_FACE, DS_FACE_IMG]
TOP_LEVEL_2 = [DS_SEGMENTS, DS_RAW, DS_RESIDUALS]
SEGMENTS = [SEG_1, SEG_2, SEG_3, SEG_4, SEG_5]
SPLIT = [DS_TRAIN, DS_TEST, DS_VAL]
CLASS = [CLASS_REAL, CLASS_FAKE]

SEG = ['seg_1_', 'seg_2_', 'seg_3_', 'seg_4_', 'seg_5_']

# I-Frame Extraction

## Testing Logic

In [None]:
test_vid = av.open('dataset_original/Celeb-real/id0_0000.mp4')

for packet in test_vid.demux():
    for frame in packet.decode():
        print(f'{frame.pict_type} - {frame.key_frame}')

In [49]:
test_input = av.open('dataset_original/Celeb-real/id0_0000.mp4')
test_output = av.open('dataset_IFrames/id0_0000.mp4', 'w')

i_frame_count = 0

in_stream = test_input.streams.video[0]
in_stream.codec_context.skip_frame = "NONKEY"

out_stream = test_output.add_stream(template=in_stream)

for packet in test_input.demux(in_stream):
    if packet.dts is None:
        continue

    if packet.is_keyframe:
        i_frame_count += 1
        packet.stream = out_stream
        test_output.mux(packet)

print(i_frame_count)

test_input.close()
test_output.close()

40


In [9]:
count = 0
for video in os.listdir(DS_ORG + CELEB_REAL):
    count += 1
    if (count == 10):
        break

    input_vid = av.open(DS_ORG + CELEB_REAL + video)
    output_vid = av.open(DS_IFRAME + CELEB_REAL + video, 'w')

    in_stream = input_vid.streams.video[0]
    in_stream.codec_context.skip_frame = "NONKEY"

    out_stream = output_vid.add_stream(template=in_stream)

    for packet in input_vid.demux(in_stream):
        if packet.dts is None:
            continue
    
        if packet.is_keyframe:
            packet.stream = out_stream
            output_vid.mux(packet)

    input_vid.close()
    output_vid.close()


## Implementation

In [3]:
def extract_frames(src_dir, dest_dir, vid_class, filename):
    input_vid = av.open(src_dir + vid_class + filename)
    output_vid = av.open(dest_dir + vid_class + filename, 'w')

    in_stream = input_vid.streams.video[0]
    in_stream.codec_context.skip_frame = "NONKEY"

    out_stream = output_vid.add_stream(template=in_stream)

    for packet in input_vid.demux(in_stream):
        if packet.dts is None:
            continue

        if packet.is_keyframe:
            packet.stream = out_stream
            output_vid.mux(packet)

    input_vid.close()
    output_vid.close()

In [6]:
# Extracting I-Frames from all videos (Celeb-DF v1)

src_base_path = DS_CDFV1 + DS_SPLIT
dst_base_path = DS_CDFV1 + DS_IFRAMES

for split in SPLIT:
    for class_dir in CLASS:
        for video in os.listdir(src_base_path + split + class_dir):
            extract_frames(src_base_path + split, dst_base_path + split, class_dir, video)

In [4]:
# Extracting I-Frames from all videos (Celeb-DF v2)

src_base_path = DS_CDFV2 + DS_SPLIT
dst_base_path = DS_CDFV2 + DS_IFRAMES

for split in SPLIT:
    print(f'---Split started: {split}---')
    for class_dir in CLASS:
        print(f'Class started: {class_dir}')
        for video in os.listdir(src_base_path + split + class_dir):
            extract_frames(src_base_path + split, dst_base_path + split, class_dir, video)

        print(f'Class finished: {class_dir}')

    print(f'---Split finished: {split}---')

---Split started: train_dataset/---
Class started: real/
Class finished: real/
Class started: fake/
Class finished: fake/
---Split finished: train_dataset/---
---Split started: test_dataset/---
Class started: real/
Class finished: real/
Class started: fake/
Class finished: fake/
---Split finished: test_dataset/---
---Split started: val_dataset/---
Class started: real/
Class finished: real/
Class started: fake/
Class finished: fake/
---Split finished: val_dataset/---


# Face Extraction

## Functions

In [3]:
# MesoNet works best with images having 256x256 dimension
# If face location borders span a smaller distance, extend the borders
# on either side equally to ensure 256x256 image

def normalize_face_borders(low, high, max_val, req_dim):
    diff = high - low
    if diff >= 256:
        return

    offset = float((req_dim - diff)) / 2
    low = max(0, low - offset)
    high = min(max_val, high + offset)

    return low, high

In [4]:
# New normalize function to always make the cropped face image 256x256 dimension
# which will be fed as input to the MesoNet

def modified_normalize_face_borders(low, high, boundary):
    diff = high - low

    if diff <= 256:
        offset = 256 - diff
        low = max(0, min(low - offset / 2 , low))
        high = min(boundary, max(high + (offset - offset / 2), high))

    return low, high

In [5]:
# Face Location: (left, top, right, bottom)
def modify_crop_window(face_location, height, width, req_dim):
    left, right = normalize_face_borders(face_location[0], face_location[2], width, req_dim)
    top, bot = normalize_face_borders(face_location[1], face_location[3], height, req_dim)

    face_location = (left, top, right, bot)

    return face_location

## Testing Logic

In [9]:
test_input = av.open('celeb_df_v1/dataset_iframes/train_dataset/fake/id0_id16_0000.mp4')

count = 0

for frame in test_input.decode():
    nd_frame = frame.to_ndarray()
    img_frame = frame.to_image()

    height, width = img_frame.height, img_frame.width

    # Face location returned by face_recognition api: [(top, right, bottom, left)] in css terms
    # Face location required by PIL.Image: (left, top, right, bottom)
    face_location = face_recognition.api.face_locations(nd_frame)
    face_location = get_crop_window(face_location, height, width)
    
    img_frame = img_frame.crop(face_location)
    img_frame.save(f'dataset_face/Celeb-real/id0_0000_{count}.jpg')

    count += 1 

[(126, 643, 216, 554)]


### Turning Cropped faces to a Video

In [45]:
test_input = av.open('dataset_IFrames/Celeb-real/id10_0001.mp4')
test_output = av.open('dataset_Face/Celeb-real/id10_0001.mp4', 'w')

in_stream = test_input.streams.video[0]
codec_name = in_stream.codec_context.name

out_stream = test_output.add_stream(codec_name, 2)
out_stream.width = in_stream.codec_context.width
out_stream.height = in_stream.codec_context.height
out_stream.pix_fmt = in_stream.codec_context.pix_fmt

for frame in test_input.decode(in_stream):
    img_frame = frame.to_image()
    nd_frame = frame.to_ndarray()

    height, width = img_frame.height, img_frame.width

    # Face location returned by face_recognition api: [(top, right, bottom, left)]
    # Face location required by PIL.Image: (left, top, right, bottom)
    face_location = face_recognition.api.face_locations(nd_frame)
    
    if len(face_location) == 0:
        continue

    # face_location = (face_location[0][3], face_location[0][0], face_location[0][1], face_location[0][2])

    # left, right = normalize_face_borders(face_location[0], face_location[2])
    # bot, top = normalize_face_borders(face_location[3], face_location[1])
    # face_location = (left, top, right, bot)

    face_location = get_crop_window(face_location, height, width)
    img_frame = img_frame.crop(face_location)

    out_frame = av.VideoFrame.from_image(img_frame)
    out_packet = out_stream.encode(out_frame)
    test_output.mux(out_packet)

out_packet = out_stream.encode(None)
test_output.mux(out_packet)

test_input.close()
test_output.close()

bitrate tolerance 128000 too small for bitrate 1024000, overriding


<PIL.Image.Image image mode=RGB size=948x500 at 0x7FD450981D50>


## Implementation

In [6]:
def save_cropped_faces_to_video(src_dir, dest_dir, vid_class, filename, req_dim):
    input = av.open(src_dir + vid_class + filename)
    output = av.open(dest_dir + vid_class + filename, 'w')

    in_stream = input.streams.video[0]
    codec_name = in_stream.codec_context.name

    # output video dimension should be 256x256
    out_stream = output.add_stream(codec_name, rate=2)
    out_stream.width = 256
    out_stream.height = 256
    out_stream.pix_fmt = in_stream.codec_context.pix_fmt

    for frame in input.decode(in_stream):
        img_frame = frame.to_image()
        nd_frame = frame.to_ndarray()

        # Face location returned by face_recognition api: [(top, right, bottom, left)]
        # Origin considered at top left corner of image => right margin > left margin, bottom > top
        face_location = face_recognition.api.face_locations(nd_frame)

        # if can't find a face, then skip that frame
        # TODO : sync frame skipping with temporality stream
        if len(face_location) == 0:
            continue

        # Face location required by PIL.Image: (left, top, right, bottom)
        face_location = (face_location[0][3], face_location[0][0], 
                         face_location[0][1], face_location[0][2])
            
        # Modify crop window size only if positive value given.
        if (req_dim > 0):    
            face_location = modify_crop_window(face_location, img_frame.height, img_frame.width, req_dim)
            
        img_frame = img_frame.crop(face_location)
        
        out_frame = av.VideoFrame.from_image(img_frame)
        out_packet = out_stream.encode(out_frame)
        output.mux(out_packet)

    out_packet = out_stream.encode(None)
    output.mux(out_packet)

    input.close()
    output.close()

In [7]:
def save_cropped_faces_to_video_gpu(src_dir, dest_dir, vid_class, filename, req_dim):
    input = av.open(src_dir + vid_class + filename)
    output = av.open(dest_dir + vid_class + filename, 'w')

    in_stream = input.streams.video[0]
    codec_name = in_stream.codec_context.name

    # output video dimension should be 256x256
    out_stream = output.add_stream(codec_name, rate=2)
    out_stream.width = 256
    out_stream.height = 256
    out_stream.pix_fmt = in_stream.codec_context.pix_fmt

    frame_list = []
    image_list = []
    for frame in input.decode(in_stream):
        frame_list.append(frame.to_ndarray())
        image_list.append(frame.to_image())

    # Face location returned by face_recognition api: [(top, right, bottom, left)]
    # Origin considered at top left corner of image => right margin > left margin, bottom > top
    # batch mode uses GPU. Default batch size = 128
    face_locations = face_recognition.api.batch_face_locations(frame_list, number_of_times_to_upsample=0, batch_size=8)
    for img_frame, face_location in zip(image_list, face_locations):
        if len(face_location) == 0:
            continue

        # Face location required by PIL.Image: (left, top, right, bottom)
        face_location = (face_location[0][3], face_location[0][0], 
                         face_location[0][1], face_location[0][2])

        # Modify crop window size only if positive value given.
        if (req_dim > 0):    
            face_location = modify_crop_window(face_location, img_frame.height, img_frame.width, req_dim)
        
        img_frame = img_frame.crop(face_location)
        
        out_frame = av.VideoFrame.from_image(img_frame)
        out_packet = out_stream.encode(out_frame)
        output.mux(out_packet)

    out_packet = out_stream.encode(None)
    output.mux(out_packet)

    input.close()
    output.close()

### Simple Method to save cropped faces to video

In [11]:
# The codec resizes the video according to specified dimension.
# The face_location from face_recognition api can be directly used without normalizing borders.
def simple_save_cropped_faces_to_video(src_dir, dest_dir, vid_class, filename):
    input = av.open(src_dir + vid_class + filename)
    output = av.open(dest_dir + vid_class + filename, 'w')

    in_stream = input.streams.video[0]
    codec_name = in_stream.codec_context.name

    # output video dimension should be 256x256
    out_stream = output.add_stream(codec_name, 2)
    out_stream.width = 256
    out_stream.height = 256
    out_stream.pix_fmt = in_stream.codec_context.pix_fmt

    for frame in input.decode(in_stream):
        img_frame = frame.to_image()
        nd_frame = frame.to_ndarray()

        height, width = img_frame.height, img_frame.width

        # Face location returned by face_recognition api: [(top, right, bottom, left)]
        # Face location required by PIL.Image: (left, top, right, bottom)
        face_location = face_recognition.api.face_locations(nd_frame)

        # can't find a face, then skip that frame
        # TODO : sync frame skipping with temporality stream
        if len(face_location) == 0:
            continue
        
        # since the codec resizes the video depending on specified dimension
        # no need to normalize borders
        face_location = (face_location[0][3], face_location[0][0], face_location[0][1], face_location[0][2])
        img_frame = img_frame.crop(face_location)
        
        out_frame = av.VideoFrame.from_image(img_frame)
        out_packet = out_stream.encode(out_frame)
        output.mux(out_packet)

    out_packet = out_stream.encode(None)
    output.mux(out_packet)

    input.close()
    output.close()

### Celeb DF v1

In [None]:
# Extracting Faces from all videos

iframe_base_path = DS_CDFV1 + DS_IFRAMES
face_base_path = DS_CDFV1 + DS_FACE

for split in SPLIT:
    for class_dir in CLASS:
        for video in os.listdir(iframe_base_path + split + class_dir):
            save_cropped_faces_to_video(iframe_base_path + split, face_base_path + split, class_dir, video, req_dim=-1)

In [None]:
# Extracting Faces from all videos (Using GPU)

iframe_base_path = DS_CDFV1 + DS_IFRAMES
face_base_path = DS_CDFV1 + DS_FACE

for split in SPLIT:
    for class_dir in CLASS:
        for video in os.listdir(iframe_base_path + split + class_dir):
            save_cropped_faces_to_video_gpu(iframe_base_path + split, face_base_path + split, class_dir, video, req_dim=-1)

### Celeb DF v2

In [None]:
# Extracting Faces from all videos

iframe_base_path = DS_CDFV2 + DS_IFRAMES
face_base_path = DS_CDFV2 + DS_FACE

for split in SPLIT:
    for class_dir in CLASS:
        for video in os.listdir(iframe_base_path + split + class_dir):
            save_cropped_faces_to_video(iframe_base_path + split, face_base_path + split, class_dir, video, req_dim=-1)

In [None]:
# Extracting Faces from all videos (Using GPU)

iframe_base_path = DS_CDFV2 + DS_IFRAMES
face_base_path = DS_CDFV2 + DS_FACE

for split in SPLIT:
    for class_dir in CLASS:
        for video in os.listdir(iframe_base_path + split + class_dir):
            save_cropped_faces_to_video_gpu(iframe_base_path + split, face_base_path + split, class_dir, video, req_dim=-1)

# Segmenting Videos


#### Implementation

In [7]:
def segment_video(src_dir, dest_dir, filename, no_of_segments):
    
    input = av.open(src_dir + filename)

    in_stream = input.streams.video[0]
    total_frames = in_stream.frames
    
    frames_per_segment = total_frames / no_of_segments

    codec_name = in_stream.codec_context.name

    count = 1
    seg_no = 0

    # output video dimension should be 224x224
    output = av.open(dest_dir + SEG[seg_no] + filename, 'w')
    out_stream = output.add_stream(codec_name, 2)
    out_stream.width = 224
    out_stream.height = 224
    out_stream.pix_fmt = in_stream.codec_context.pix_fmt

    for frame in input.decode(in_stream):
        img_frame = frame.to_image()
        nd_frame = frame.to_ndarray()

        height, width = img_frame.height, img_frame.width

        if seg_no < no_of_segments - 1 and count > frames_per_segment:
            count = 1
            seg_no += 1
            out_packet = out_stream.encode(None)
            output.mux(out_packet)
            output.close()
            
            output = av.open(dest_dir + SEG[seg_no] + filename, 'w')
            out_stream = output.add_stream(codec_name, 2)
            out_stream.width = 224
            out_stream.height = 224
            out_stream.pix_fmt = in_stream.codec_context.pix_fmt

        out_frame = av.VideoFrame.from_image(img_frame)
        out_packet = out_stream.encode(out_frame)
        output.mux(out_packet)

        count += 1

    out_packet = out_stream.encode(None)
    output.mux(out_packet)
    input.close()
    output.close()

In [None]:
# Extracting 3-Segments from real celebrity videos

for video in os.listdir(DS_ORG + CELEB_REAL):
    segment_video(DS_ORG, DS_SEG, CELEB_REAL, video)
    break

In [None]:
# Extracting 3-Segments from fake celebrity videos

for video in os.listdir(DS_ORG + CELEB_REAL):
    segment_video(DS_ORG, DS_SEG, CELEB_FAKE, video)
    break

In [None]:
# Extracting 3-Segments from real youtube videos

for video in os.listdir(DS_ORG + CELEB_REAL):
    segment_video(DS_ORG, DS_SEG, YT_REAL, video)
    break

### Celeb DF v1

In [None]:
for no_of_segments, segment in enumerate(SEGMENTS):
    for split in SPLIT:
        for class_label in CLASS:
            
            src_dir = DS_CDFV1 + DS_SPLIT + split + class_label
            dest_dir = DS_CDFV1 + DS_SEGMENTS + segment + split + class_label

            for video in os.listdir(src_dir):
                segment_video(src_dir, dest_dir, video, no_of_segments + 1)

### Celeb DF v2

In [None]:
for no_of_segments, segment in enumerate(SEGMENTS):
    for split in SPLIT:
        for class_label in CLASS:
            
            src_dir = DS_CDFV2 + DS_SPLIT + split + class_label
            dest_dir = DS_CDFV2 + DS_SEGMENTS + segment + split + class_label

            for video in os.listdir(src_dir):
                segment_video(src_dir, dest_dir, video, no_of_segments + 1)

# CoViAR (Compressed Video Action Recognition)

## Testing Logic

In [10]:
# For Residual Feature extraction before the temporality stream
from coviar import load

In [None]:
# load([input], [gop_index], [frame_index], [representation_type], [accumulate])
# input: path to video (.mp4).
# representation_type: 0, 1, or 2. 0 for I-frames, 1 for motion vectors, 2 for residuals.
# accumulate: True or False. True returns the accumulated representation. False returns the original compressed representations. (See paper for details. )
# The following call returns one frame (specified by frame_index=0,1,...) of one GOP ie Group Of Pictures (specified by gop_index=0,1,...).

# TODO: Supports only mpeg4 raw videos, but the dataset we use are compressed videos
load(DS_ORG + CELEB_REAL + 'id0_0000.mp4', 0, 0, 2, False)

In [None]:
# mpeg4 raw videos can be obtained by the following command
# ffmpeg -i input.mp4 -c:v  -c:v mpeg4 -f rawvideo output.mp4

cmd = "ffmpeg -i '{}' -c:v mpeg4 -f rawvideo '{}'".format(DS_SEG + CELEB_REAL + 'seg_1_id0_0000.mp4', DS_RAW + CELEB_REAL + 'seg_1_id0_0000.mp4')
print(cmd)
os.system(cmd)

In [None]:
cmd = "ffmpeg -i '{}' -c:v mpeg4 -f rawvideo '{}'".format(DS_SEG + CELEB_REAL + 'seg_1_id0_0000.mp4', DS_RAW + CELEB_REAL + 'temp.mp4')
print(cmd)
os.system(cmd)

cmd = "ffmpeg -i '{}' -c copy '{}'".format(DS_RAW + CELEB_REAL + 'temp.mp4', DS_RAW + CELEB_REAL + 'seg_1_id0_0000.mp4')
print(cmd)
os.system(cmd)

rm = "rm {}".format(DS_RAW + CELEB_REAL + 'temp.mp4')
print(rm)
os.system(rm)

In [None]:
# load([input], [gop_index], [frame_index], [representation_type], [accumulate])
res_features = load(DS_RAW + CELEB_REAL + 'seg_1_id0_0000.mp4', 0, 3, 2, True)
print(res_features)

## Implementation

### Converting Compressed videos to mpeg4 raw videos

In [12]:
def get_raw_mpeg(src_dir, dest_dir, filename):
    
    cmd = "ffmpeg -i '{}' -c:v mpeg4 -f rawvideo '{}'".format(src_dir + filename, dest_dir  + 'temp.mp4')
    os.system(cmd)

    cmd = "ffmpeg -i '{}' -c copy '{}'".format(dest_dir + 'temp.mp4', dest_dir +  filename)
    os.system(cmd)

    rm = "rm {}".format(dest_dir + 'temp.mp4')
    os.system(rm)

In [None]:
for video in os.listdir(DS_SEG + CELEB_REAL):
    get_raw_mpeg(DS_SEG, DS_RAW, CELEB_REAL, video)

In [None]:
for video in os.listdir(DS_SEG + CELEB_FAKE):
    get_raw_mpeg(DS_SEG, DS_RAW, CELEB_FAKE, video)

In [None]:
for video in os.listdir(DS_SEG + YT_REAL):
    get_raw_mpeg(DS_SEG, DS_RAW, YT_REAL, video)

### Celeb DF v1

In [None]:
for no_of_segments, segment in enumerate(SEGMENTS):
    for split in SPLIT:
        for class_label in CLASS:
            
            src_dir = DS_CDFV1 + DS_SEGMENTS + segment + split + class_label
            dest_dir = DS_CDFV1 + DS_RAW + segment + split + class_label

            for video in os.listdir(src_dir):
                get_raw_mpeg(src_dir, dest_dir, video)

### Celeb DF v2

In [None]:
for no_of_segments, segment in enumerate(SEGMENTS):
    for split in SPLIT:
        for class_label in CLASS:
            
            src_dir = DS_CDFV2 + DS_SEGMENTS + segment + split + class_label
            dest_dir = DS_CDFV2 + DS_RAW + segment + split + class_label

            for video in os.listdir(src_dir):
                get_raw_mpeg(src_dir, dest_dir, video)

# Residual Feature Extraction

## Testing Logic

In [None]:
test_input = av.open(DS_RAW + CELEB_REAL + 'seg_1_id0_0000.mp4')

open = -1
close = 0
gop = []

in_stream = test_input.streams.video[0]
in_stream.codec_context.skip_frame = "NONKEY"

for packet in test_input.demux(in_stream):
    if packet.dts is None:
        continue

    if packet.is_keyframe:
        if open == -1:
            open = 0
            close = 0
        else:
            gop.append([open, close])
            open = 0
            close = 0
            
    close += 1

gop.append([open, close])

print(gop)
test_input.close()

In [None]:
# load([input], [gop_index], [frame_index], [representation_type], [accumulate])

for video in os.listdir(DS_RAW + CELEB_REAL):
    # extract residual feature for each frame
    res_features = load(DS_RAW + CELEB_REAL + video, 7, 1, 2, True)
    # print(res_features)

    res_features = np.where(res_features<0,0,res_features)
    # print(res_features)

    for x in res_features:
        for y in x:
            print(y)
    break

## Implementation

In [7]:
def get_gop_frame_index(src_dir, filename):
    
    input = av.open(src_dir + filename)

    open = -1
    close = 0
    gop = []

    in_stream = input.streams.video[0]
    in_stream.codec_context.skip_frame = "NONKEY"

    for packet in input.demux(in_stream):
        if packet.dts is None:
            continue

        if packet.is_keyframe:
            if open == -1:
                open = 0
                close = 0
            else:
                gop.append([open, close])
                open = 0
                close = 0
                
        close += 1

    gop.append([open, close])
    input.close()

    return gop

In [8]:
def extract_residual_features(src_dir, dest_dir, filename):
    
    gop = get_gop_frame_index(src_dir, filename)
        
    input = av.open(src_dir + filename)
    in_stream = input.streams.video[0]
    codec_name = in_stream.codec_context.name

    output = av.open(dest_dir + filename, 'w')
    out_stream = output.add_stream(codec_name, 2)
    out_stream.width = 224
    out_stream.height = 224
    out_stream.pix_fmt = in_stream.codec_context.pix_fmt
    
    for gop_index, interval in enumerate(gop):
        for frame_index in range(interval[0], interval[1]):     # [interval[0] , interval[1])

            # load([input], [gop_index], [frame_index], [representation_type], [accumulate])
            res_feature = load(src_dir + filename, gop_index, frame_index, 2, True).astype(np.uint8)
            res_feature = np.where(res_feature < 0, 0, res_feature)
            res_feature = PIL.Image.fromarray(res_feature)

            out_frame = av.VideoFrame.from_image(res_feature)
            out_packet = out_stream.encode(out_frame)
            output.mux(out_packet)

    out_packet = out_stream.encode(None)
    output.mux(out_packet)
    input.close()
    output.close()

In [None]:
for video in os.listdir(DS_RAW + CELEB_REAL):
    extract_residual_features(DS_RAW, DS_RES, CELEB_REAL, video)

In [None]:
for video in os.listdir(DS_RAW + CELEB_FAKE):
    extract_residual_features(DS_RAW, DS_RES, CELEB_FAKE, video)

In [None]:
for video in os.listdir(DS_RAW + YT_REAL):
    extract_residual_features(DS_RAW, DS_RES, YT_REAL, video)

### Celeb DF v1

In [None]:
for no_of_segments, segment in enumerate(SEGMENTS):
    for split in SPLIT:
        for class_label in CLASS:
            
            src_dir = DS_CDFV1 + DS_RAW + segment + split + class_label
            dest_dir = DS_CDFV1 + DS_RESIDUALS + segment + split + class_label

            for video in os.listdir(src_dir):
                extract_residual_features(src_dir, dest_dir, video)

### Celeb DF v2

In [None]:
for no_of_segments, segment in enumerate(SEGMENTS):
    for split in SPLIT:
        for class_label in CLASS:
            
            src_dir = DS_CDFV2 + DS_RAW + segment + split + class_label
            dest_dir = DS_CDFV2 + DS_RESIDUALS + segment + split + class_label

            for video in os.listdir(src_dir):
                extract_residual_features(src_dir, dest_dir, video)