# Libraries Used

* ffmpeg-python
* av
* cmake
* dlib  (based on the python version)
* face-recognition

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
%cd drive/MyDrive/FYP/Celeb DF\ v1\ Dataset/
%pwd

/content/drive/MyDrive/FYP/Celeb DF v1 Dataset


'/content/drive/MyDrive/FYP/Celeb DF v1 Dataset'

In [7]:
%pip install ffmpeg-python
%pip install av
%pip install cmake
%pip install dlib
%pip install face-recognition

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ffmpeg-python
  Downloading ffmpeg_python-0.2.0-py3-none-any.whl (25 kB)
Installing collected packages: ffmpeg-python
Successfully installed ffmpeg-python-0.2.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting av
  Downloading av-10.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (31.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m28.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: av
Successfully installed av-10.0.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting face-recognition
  Down

# **MESONET**

In [8]:
import av

import keras
from keras import layers
from keras.layers import Conv2D, MaxPool2D, Flatten, Dense, BatchNormalization, Dropout
from keras.optimizers import Adam

import shutil
import imghdr
from PIL import Image

import os
import random

In [9]:
DS_ORG = './dataset_original/'
DS_IFRAME = './dataset_IFrames/'
DS_FACE = './dataset_face/'
DS_FINAL = './dataset_final/'
DS_SEG = './dataset_segments/'
DS_RAW = './dataset_raw/'
DS_RES = './dataset_residuals/'

MS_TRAIN = './dataset_mesonet/train_dataset/'
MS_TEST = './dataset_mesonet/test_dataset/'
MS_MODEL = './dataset_mesonet/models'

CELEB_REAL = 'Celeb-real/'
CELEB_FAKE = 'Celeb-synthesis/'
YT_REAL = 'YouTube-real/'

REAL_VIDS = 'real_videos/'
FAKE_VIDS = 'fake_videos/'

SEG = ['seg_1_', 'seg_2_', 'seg_3_']

In [10]:
def create_model(input_size):
  model = keras.Sequential()

  model.add(layers.Conv2D(input_shape=input_size, filters=8, kernel_size=3, activation='relu', padding="same"))
  model.add(BatchNormalization())
  model.add(MaxPool2D(2, 2, padding="same"))

  model.add(layers.Conv2D(input_shape=(128, 128, 8), filters=8, kernel_size=5, activation='relu', padding="same"))
  model.add(BatchNormalization())
  model.add(MaxPool2D(2, 2, padding="same"))

  
  model.add(layers.Conv2D(input_shape=(64, 64, 8), filters=16, kernel_size=5, activation='relu', padding="same"))
  model.add(BatchNormalization())
  model.add(MaxPool2D(4, 4, padding="same"))

  
  model.add(layers.Conv2D(input_shape=(16, 16, 16), filters=16, kernel_size=5, activation='relu', padding="same"))
  model.add(BatchNormalization())
  model.add(MaxPool2D(4, 4, padding="same"))
  model.add(Flatten())

  model.add(Dropout(0.5))
  model.add(layers.Dense(16))
  model.add(layers.LeakyReLU())

  model.add(Dropout(0.5))
  model.add(layers.Dense(1, activation='sigmoid'))
  
  return model
  

In [11]:
input_size = (256, 256, 3)
model = create_model(input_size)
model.compile(optimizer=Adam(learning_rate=0.0001), 
              loss='binary_crossentropy', 
              metrics = [keras.metrics.BinaryAccuracy(), 
                         keras.metrics.Precision(), 
                         keras.metrics.Recall(),
                         keras.metrics.AUC()])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 256, 256, 8)       224       
                                                                 
 batch_normalization (BatchN  (None, 256, 256, 8)      32        
 ormalization)                                                   
                                                                 
 max_pooling2d (MaxPooling2D  (None, 128, 128, 8)      0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 128, 128, 8)       1608      
                                                                 
 batch_normalization_1 (Batc  (None, 128, 128, 8)      32        
 hNormalization)                                                 
                                                        

## Frame Extraction

In [None]:
def dataset_extract_frames(source_path, dest_path, vid, tag, count):
  frame_count = 0

  if(imghdr.what(os.path.join(source_path, vid)) == 'jpeg'):
    image = Image.open(source_path + vid)
    image.save(f'{dest_path}/vid_{tag}{count}_fr_{frame_count}.jpg')
    
    return

  vid = av.open(source_path + vid)
  for frame in vid.decode():
    image = frame.to_image()
    image.save(f'{dest_path}/vid_{tag}{count}_fr_{frame_count}.jpg')

    frame_count += 1

In [None]:
#extracting frames from Celeb-real face-cropped data
vid_count = 1
source_path = DS_FACE + CELEB_REAL
for video in os.listdir(source_path):
  # print(video, vid_count)
  dataset_extract_frames(source_path, MS_TRAIN + REAL_VIDS, video, 'cr', vid_count)
  vid_count += 1

In [None]:
#extracting frames from YouTube-real face-cropped data
vid_count = 1
source_path = DS_FACE + YT_REAL
for video in os.listdir(source_path):
  # print(video, vid_count)
  dataset_extract_frames(source_path, MS_TRAIN + REAL_VIDS, video, 'yr', vid_count)
  vid_count += 1

In [None]:
#extracting frames from Celeb-synthesis face-cropped data
vid_count = 1
source_path = DS_FACE + CELEB_FAKE
for video in os.listdir(source_path):
  # print(video, vid_count)
  dataset_extract_frames(source_path, MS_TRAIN + FAKE_VIDS, video, 'cf', vid_count)
  vid_count += 1

In [None]:
#extracting test data

def extract_test_data(source_path):
  frame_list = []
  for frame in os.listdir(source_path):
    frame_path = os.path.join(source_path, frame)
    frame_list.append(frame_path)

  size = int(20/100 * len(frame_list))
  sampled_list = random.sample(frame_list, size)
  return sampled_list

In [None]:
#extracting test data from real dataset

source_path = MS_TRAIN + REAL_VIDS
sampled_list = extract_test_data(source_path)
for frame_path in sampled_list:
  shutil.copy(frame_path, os.path.join(MS_TEST + REAL_VIDS, os.path.basename(frame_path)))
  os.remove(frame_path)

In [None]:
#extracting test data from fake dataset

source_path = MS_TRAIN + FAKE_VIDS
sampled_list = extract_test_data(source_path)
for frame_path in sampled_list:
  shutil.copy(frame_path, os.path.join(MS_TEST + FAKE_VIDS, os.path.basename(frame_path)))
  os.remove(frame_path)

## Dataset Creation

In [None]:
#creating dataset from folders
def create_dataset(dir_path):
  ds = keras.utils.image_dataset_from_directory(
      directory = dir_path,
      labels = 'inferred',
      label_mode = 'binary',
      batch_size = 32,
      color_mode = 'rgb',
      shuffle = True,
      validation_split = 0.2,
      subset = 'validation',
      seed = 1
  )
  return ds

In [None]:
(train_ds, val_ds) = keras.utils.image_dataset_from_directory(
      directory = MS_TRAIN,
      labels = 'inferred',
      label_mode = 'binary',
      batch_size = 32,
      color_mode = 'rgb',
      shuffle = True,
      validation_split = 0.2,
      subset = 'both',
      seed = 1
  )
for data, labels in train_ds.take(1):
  print(data.shape)

Found 22627 files belonging to 2 classes.
Using 18102 files for training.
Using 4525 files for validation.
(32, 256, 256, 3)


In [None]:
test_ds = keras.utils.image_dataset_from_directory(
    directory=MS_TEST,
    labels='inferred',
    label_mode='binary'
)

for data, labels in test_ds.take(1):
  print(data.shape)

Found 5655 files belonging to 2 classes.
(32, 256, 256, 3)


## Model Training

In [None]:
# Load from checkpoint (if exists)
try:
    saved_model = keras.models.load_model(MS_MODEL)
    model = saved_model

except IOError:
    pass

In [None]:
max_epochs = 20

model.fit(train_ds, 
          epochs=max_epochs, 
          validation_data=val_ds,
          callbacks=keras.callbacks.ModelCheckpoint(MS_MODEL),
          verbose=1)

Epoch 1/5




566/566 - 47s - loss: 0.6395 - binary_accuracy: 0.6655 - precision: 0.6805 - recall: 0.6807 - val_loss: 0.5309 - val_binary_accuracy: 0.7399 - val_precision: 0.7502 - val_recall: 0.7389 - 47s/epoch - 83ms/step
Epoch 2/5




566/566 - 48s - loss: 0.6152 - binary_accuracy: 0.6768 - precision: 0.6918 - recall: 0.6903 - val_loss: 0.5222 - val_binary_accuracy: 0.7503 - val_precision: 0.7593 - val_recall: 0.7514 - 48s/epoch - 84ms/step
Epoch 3/5




566/566 - 50s - loss: 0.5874 - binary_accuracy: 0.6945 - precision: 0.7065 - recall: 0.7126 - val_loss: 0.5095 - val_binary_accuracy: 0.7531 - val_precision: 0.7324 - val_recall: 0.8173 - 50s/epoch - 87ms/step
Epoch 4/5




566/566 - 47s - loss: 0.5720 - binary_accuracy: 0.7061 - precision: 0.7208 - recall: 0.7161 - val_loss: 0.5015 - val_binary_accuracy: 0.7717 - val_precision: 0.7788 - val_recall: 0.7751 - 47s/epoch - 82ms/step
Epoch 5/5




566/566 - 47s - loss: 0.5564 - binary_accuracy: 0.7177 - precision: 0.7335 - recall: 0.7240 - val_loss: 0.4791 - val_binary_accuracy: 0.7839 - val_precision: 0.8246 - val_recall: 0.7350 - 47s/epoch - 84ms/step


<keras.callbacks.History at 0x7f9f031ad850>

## Test dataset metrics

In [None]:
loss, acc, prec, rec, auc = model.evaluate(test_ds)

print(f'Loss:\t{loss:.4f}')
print(f'Accuracy:\t{acc:.4f}')
print(f'Precision:\t{prec:.4f}')
print(f'Recall:\t{rec:.4f}')
print(f'AUC:\t{rec:.4f}')

## Testing Logic

In [None]:
new_model = keras.models.load_model(MS_MODEL)
new_model

<keras.engine.sequential.Sequential at 0x7f9f032ded00>

In [None]:
new_model.fit(train_ds, 
          epochs=5, 
          validation_data=val_ds,
          callbacks=keras.callbacks.ModelCheckpoint(MS_MODEL),
          verbose=2)

Epoch 1/5




566/566 - 52s - loss: 0.5269 - binary_accuracy: 0.7397 - precision: 0.7585 - recall: 0.7378 - val_loss: 0.4590 - val_binary_accuracy: 0.8020 - val_precision: 0.8401 - val_recall: 0.7583 - 52s/epoch - 92ms/step
Epoch 2/5




566/566 - 48s - loss: 0.5091 - binary_accuracy: 0.7522 - precision: 0.7731 - recall: 0.7458 - val_loss: 0.4488 - val_binary_accuracy: 0.8082 - val_precision: 0.8208 - val_recall: 0.8009 - 48s/epoch - 85ms/step
Epoch 3/5




566/566 - 46s - loss: 0.4856 - binary_accuracy: 0.7678 - precision: 0.7877 - recall: 0.7618 - val_loss: 0.4322 - val_binary_accuracy: 0.8188 - val_precision: 0.8515 - val_recall: 0.7833 - 46s/epoch - 82ms/step
Epoch 4/5




566/566 - 47s - loss: 0.4790 - binary_accuracy: 0.7704 - precision: 0.7915 - recall: 0.7623 - val_loss: 0.4223 - val_binary_accuracy: 0.8197 - val_precision: 0.8352 - val_recall: 0.8078 - 47s/epoch - 83ms/step
Epoch 5/5




566/566 - 45s - loss: 0.4649 - binary_accuracy: 0.7814 - precision: 0.8076 - recall: 0.7646 - val_loss: 0.4134 - val_binary_accuracy: 0.8219 - val_precision: 0.8568 - val_recall: 0.7837 - 45s/epoch - 80ms/step


<keras.callbacks.History at 0x7f9f033974f0>

In [None]:
loss, acc, prec, rec = new_model.evaluate(test_ds)

print(f'Loss: {loss:.4f}')
print(f'Accuracy: {acc:.4f}')
print(f'Precision: {prec:.4f}')
print(f'Recall: {rec:.4f}')

Loss: 0.4123
Accuracy: 0.8212
Precision: 0.8613
Recall: 0.7833
