# Libraries Used

* ffmpeg-python
* av
* cmake
* dlib  (based on the python version)
* face-recognition

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%cd drive/MyDrive/FYP/Celeb DF\ v1\ Dataset/

/content/drive/MyDrive/FYP/Celeb DF v1 Dataset


In [None]:
!unzip dataset_mesonet.zip

In [None]:
%pip install ffmpeg-python
%pip install av
%pip install cmake
%pip install dlib
%pip install face-recognition

# **MESONET**

In [1]:
import av

import keras
from keras import layers
from keras.layers import Conv2D, MaxPool2D, Flatten, Dense, BatchNormalization, Dropout
from keras.optimizers import Adam

import shutil
import imghdr
from PIL import Image

import os
import random

In [2]:
DS_CDFV1 = 'celeb_df_v1/'
DS_CDFV2 = 'celeb_df_v2/'

DS_ORGINAL = 'dataset_original/'
DS_SPLIT = 'dataset_split/'
DS_IFRAMES = 'dataset_iframes/'
DS_FACE = 'dataset_face/'
DS_FACE_IMG = 'dataset_face_img/'
DS_SRM_SNIPPETS = 'dataset_srm_snippets_5/'
DS_SEGMENTS = 'dataset_segments/'
DS_RAW = 'dataset_raw/'
DS_RESIDUALS = 'dataset_residuals/'
DS_TEMPORAL = 'dataset_temporal/'


SEG_1 = 'seg_1/'
SEG_2 = 'seg_2/'
SEG_3 = 'seg_3/'
SEG_4 = 'seg_4/'
SEG_5 = 'seg_5/'

SEG = ['seg_1_', 'seg_2_', 'seg_3_', 'seg_4_', 'seg_5_']

DS_TRAIN = 'train_dataset/'
DS_TEST = 'test_dataset/'
DS_VAL = 'val_dataset/'

CLASS_FAKE = 'fake/'
CLASS_REAL = 'real/'


TOP_LEVEL_1 = [DS_SPLIT, DS_IFRAMES, DS_FACE, DS_FACE_IMG, DS_SRM_SNIPPETS]
TOP_LEVEL_2 = [DS_SEGMENTS, DS_RAW, DS_RESIDUALS]
SEGMENTS = [SEG_1, SEG_2, SEG_3, SEG_4, SEG_5]
SPLIT = [DS_TRAIN, DS_TEST, DS_VAL]
CLASS = [CLASS_REAL, CLASS_FAKE]

DATASET = [DS_CDFV1, DS_CDFV2]

In [3]:
def create_model(input_size):
  model = keras.Sequential()

  model.add(layers.Conv2D(input_shape=input_size, filters=8, kernel_size=3, activation='relu', padding="same"))
  model.add(BatchNormalization())
  model.add(MaxPool2D(2, 2, padding="same"))

  model.add(layers.Conv2D(input_shape=(128, 128, 8), filters=8, kernel_size=5, activation='relu', padding="same"))
  model.add(BatchNormalization())
  model.add(MaxPool2D(2, 2, padding="same"))

  
  model.add(layers.Conv2D(input_shape=(64, 64, 8), filters=16, kernel_size=5, activation='relu', padding="same"))
  model.add(BatchNormalization())
  model.add(MaxPool2D(4, 4, padding="same"))

  
  model.add(layers.Conv2D(input_shape=(16, 16, 16), filters=16, kernel_size=5, activation='relu', padding="same"))
  model.add(BatchNormalization())
  model.add(MaxPool2D(4, 4, padding="same"))
  model.add(Flatten())

  model.add(Dropout(0.5))
  model.add(layers.Dense(16))
  model.add(layers.LeakyReLU())

  model.add(Dropout(0.5))
  model.add(layers.Dense(1, activation='sigmoid'))
  
  return model
  

In [4]:
input_size = (256, 256, 3)
model = create_model(input_size)
model.compile(optimizer=Adam(learning_rate=0.0001), 
              loss='binary_crossentropy', 
              metrics = [keras.metrics.BinaryAccuracy(), 
                         keras.metrics.Precision(), 
                         keras.metrics.Recall(),
                         keras.metrics.AUC()])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 256, 256, 8)       224       
                                                                 
 batch_normalization (BatchN  (None, 256, 256, 8)      32        
 ormalization)                                                   
                                                                 
 max_pooling2d (MaxPooling2D  (None, 128, 128, 8)      0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 128, 128, 8)       1608      
                                                                 
 batch_normalization_1 (Batc  (None, 128, 128, 8)      32        
 hNormalization)                                                 
                                                        

## Frame Extraction

In [None]:
def extract_frames_from_videos(src_path, dst_path, video, count):
  frame_count = 0

  # 

  pass

def dataset_extract_frames(source_path, dest_path, vid, tag, count):
  frame_count = 0

  if(imghdr.what(os.path.join(source_path, vid)) == 'jpeg'):
    image = Image.open(source_path + vid)
    image.save(f'{dest_path}/vid_{tag}{count}_fr_{frame_count}.jpg')
    
    return

  vid = av.open(source_path + vid)
  for frame in vid.decode():
    image = frame.to_image()
    image.save(f'{dest_path}/vid_{tag}{count}_fr_{frame_count}.jpg')

    frame_count += 1

In [None]:
#extracting frames from Celeb-real face-cropped data
vid_count = 1
source_path = DS_FACE + CELEB_REAL
for video in os.listdir(source_path):
  # print(video, vid_count)
  dataset_extract_frames(source_path, MS_TRAIN + REAL_VIDS, video, 'cr', vid_count)
  vid_count += 1

In [None]:
#extracting frames from YouTube-real face-cropped data
vid_count = 1
source_path = DS_FACE + YT_REAL
for video in os.listdir(source_path):
  # print(video, vid_count)
  dataset_extract_frames(source_path, MS_TRAIN + REAL_VIDS, video, 'yr', vid_count)
  vid_count += 1

In [None]:
#extracting frames from Celeb-synthesis face-cropped data
vid_count = 1
source_path = DS_FACE + CELEB_FAKE
for video in os.listdir(source_path):
  # print(video, vid_count)
  dataset_extract_frames(source_path, MS_TRAIN + FAKE_VIDS, video, 'cf', vid_count)
  vid_count += 1

In [None]:
#extracting test data

def extract_test_data(source_path):
  frame_list = []
  for frame in os.listdir(source_path):
    frame_path = os.path.join(source_path, frame)
    frame_list.append(frame_path)

  size = int(20/100 * len(frame_list))
  sampled_list = random.sample(frame_list, size)
  return sampled_list

In [None]:
#extracting test data from real dataset

source_path = MS_TRAIN + REAL_VIDS
sampled_list = extract_test_data(source_path)
for frame_path in sampled_list:
  shutil.copy(frame_path, os.path.join(MS_TEST + REAL_VIDS, os.path.basename(frame_path)))
  os.remove(frame_path)

In [None]:
#extracting test data from fake dataset

source_path = MS_TRAIN + FAKE_VIDS
sampled_list = extract_test_data(source_path)
for frame_path in sampled_list:
  shutil.copy(frame_path, os.path.join(MS_TEST + FAKE_VIDS, os.path.basename(frame_path)))
  os.remove(frame_path)

## Dataset Creation

In [None]:
#creating dataset from folders
def create_dataset(dir_path):
  ds = keras.utils.image_dataset_from_directory(
      directory = dir_path,
      labels = 'inferred',
      label_mode = 'binary',
      batch_size = 32,
      color_mode = 'rgb',
      shuffle = True,
      validation_split = 0.2,
      subset = 'validation',
      seed = 1
  )
  return ds

In [9]:
(train_ds, val_ds) = keras.utils.image_dataset_from_directory(
      directory = MS_TRAIN,
      labels = 'inferred',
      label_mode = 'binary',
      batch_size = 32,
      color_mode = 'rgb',
      shuffle = True,
      validation_split = 0.2,
      subset = 'both',
      seed = 1
  )
for data, labels in train_ds.take(1):
  print(data.shape)

Found 22726 files belonging to 2 classes.
Using 18181 files for training.
Using 4545 files for validation.
(32, 256, 256, 3)


In [10]:
test_ds = keras.utils.image_dataset_from_directory(
    directory=MS_TEST,
    labels='inferred',
    label_mode='binary'
)

for data, labels in test_ds.take(1):
  print(data.shape)

Found 5680 files belonging to 2 classes.
(32, 256, 256, 3)


## Model Training

In [None]:
# Load from checkpoint (if exists)
try:
    saved_model = keras.models.load_model(MS_MODEL)
    model = saved_model

except IOError:
    pass

In [None]:
max_epochs = 20

model.fit(train_ds, 
          epochs=max_epochs, 
          validation_data=val_ds,
          callbacks=keras.callbacks.ModelCheckpoint(MS_MODEL),
          verbose=1)

## Test dataset metrics

In [12]:
loss, acc, prec, rec, auc = model.evaluate(test_ds)

print(f'Loss:\t{loss:.4f}')
print(f'Accuracy:\t{acc:.4f}')
print(f'Precision:\t{prec:.4f}')
print(f'Recall:\t{rec:.4f}')
print(f'AUC:\t{rec:.4f}')

Loss:	0.2865
Accuracy:	0.8798
Precision:	0.9113
Recall:	0.8502
AUC:	0.8502


## Testing Logic

In [13]:
new_model = keras.models.load_model(MS_MODEL)
new_model

<keras.engine.sequential.Sequential at 0x7f88a1598520>

In [None]:
new_model.fit(train_ds, 
          epochs=5, 
          validation_data=val_ds,
          callbacks=keras.callbacks.ModelCheckpoint(MS_MODEL),
          verbose=2)

In [16]:
loss, acc, prec, rec, auc = new_model.evaluate(test_ds)

print(f'Loss: {loss:.4f}')
print(f'Accuracy: {acc:.4f}')
print(f'Precision: {prec:.4f}')
print(f'Recall: {rec:.4f}')
print(f'AUC: {auc:.4f}')

Loss: 0.2865
Accuracy: 0.8798
Precision: 0.9113
Recall: 0.8502
AUC: 0.9538
