In [1]:
import tensorflow as tf
import keras
from tensorflow.keras import datasets, models, layers

from keras.layers import Dense, Conv2D,  MaxPool2D, Flatten, GlobalAveragePooling2D,  BatchNormalization, Layer, Add
from keras.models import Sequential
from keras.models import Model
from keras.optimizers import Adam
from keras.callbacks import LearningRateScheduler
from keras.preprocessing import image

import os
import av
import shutil
import imghdr
from PIL import Image
import pickle
import numpy as np

from scipy.special import softmax

2023-04-06 07:15:18.901581: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-04-06 07:15:58.342726: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-04-06 07:15:58.343055: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-04-06 07:17:55.140906: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-

In [2]:
DS_CDFV1 = 'celeb_df_v1/'
DS_CDFV2 = 'celeb_df_v2/'

DS_ORGINAL = 'dataset_original/'
DS_SPLIT = 'dataset_split/'
DS_IFRAMES = 'dataset_iframes/'
DS_FACE = 'dataset_face/'
DS_FACE_IMG = 'dataset_face_img/'
DS_SRM_SNIPPETS = 'dataset_srm_snippets_5/'
DS_SEGMENTS = 'dataset_segments/'
DS_RAW = 'dataset_raw/'
DS_RESIDUALS = 'dataset_residuals/'
DS_TEMPORAL = 'dataset_temporal/'

MODELS = 'models/'


SEG_1 = 'seg_1/'
SEG_2 = 'seg_2/'
SEG_3 = 'seg_3/'
SEG_4 = 'seg_4/'
SEG_5 = 'seg_5/'

SEG = ['seg_1_', 'seg_2_', 'seg_3_', 'seg_4_', 'seg_5_']

DS_TRAIN = 'train_dataset/'
DS_TEST = 'test_dataset/'
DS_VAL = 'val_dataset/'

CLASS_FAKE = 'fake/'
CLASS_REAL = 'real/'


TOP_LEVEL_1 = [DS_SPLIT, DS_IFRAMES, DS_FACE, DS_FACE_IMG, DS_SRM_SNIPPETS]
TOP_LEVEL_2 = [DS_SEGMENTS, DS_RAW, DS_RESIDUALS]
SEGMENTS = [SEG_1, SEG_2, SEG_3, SEG_4, SEG_5]
SPLIT = [DS_TRAIN, DS_TEST, DS_VAL]
CLASS = [CLASS_REAL, CLASS_FAKE]

DATASET = [DS_CDFV1, DS_CDFV2]

In [None]:
os.makedirs(MODELS, exist_ok=True)

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%pip install av

In [14]:
%cd drive/MyDrive/FYP

/content/drive/MyDrive/FYP


In [15]:
%pwd

'/content/drive/MyDrive/FYP'

# Frame Extraction

In [None]:
for dataset in DATASET:
    for split in SPLIT:
        if split is DS_TEST:
            for segment in SEGMENTS:
                for label_class in CLASS:
                    dir = dataset + DS_TEMPORAL + split + segment + label_class
                    os.makedirs(dir, exist_ok=True) 
        else:
            for label_class in CLASS:
                dir = dataset + DS_TEMPORAL + split + label_class
                os.makedirs(dir, exist_ok=True)

In [None]:
# List of testing videos
for src_dir in DATASET:
    for label, class_label in enumerate(CLASS):
        with open(src_dir + DS_TEMPORAL + 'testing_videos.txt', 'a+') as f:
            for file in os.listdir(src_dir + DS_SPLIT + DS_TEST + class_label):
                f.write(f'{file} {label}\n')

### Test Dataset

In [None]:
def extract_frames_test(ds, dest_dir, segment, split, label, file):
    src_dir = ds + DS_RESIDUALS + segment + split + label

    for index, curr_segment in enumerate(SEGMENTS):
        if curr_segment > segment:
            break

        residual_file = src_dir + SEG[index] + file
        os.makedirs(dest_dir + SEGMENTS[index], exist_ok=True)

        vid = av.open(residual_file)

        for count, frame in enumerate(vid.decode()):
            image = frame.to_image()
            image.save(f'{dest_dir}{SEGMENTS[index]}frame_{count}.jpg')

In [None]:
def extract_segment_frames(ds, segment):
    file = open(ds + DS_TEMPORAL + 'testing_videos.txt', 'r')
    videos = file.readlines()

    for video in videos:
        [file, label] = video.split(' ')
        label = CLASS_REAL if int(label) == 0 else CLASS_FAKE
        
        dest_dir = ds + DS_TEMPORAL + DS_TEST + segment + label + '/' + file +'/'
        os.makedirs(dest_dir, exist_ok=True)

        extract_frames_test(ds, dest_dir, segment, DS_TEST, label, file)    

In [None]:
extract_segment_frames(DS_CDFV1, SEG_2)

### Training and Validation Dataset

In [None]:
def extract_frames_train_val(ds):
    for split in SPLIT:
        if split == DS_TEST:
            continue
        else:
            for label_class in CLASS:
                src_dir = ds + DS_RESIDUALS + SEG_1 + split + label_class
                dest_dir = ds + DS_TEMPORAL + split + label_class
            
                for id, video in enumerate(os.listdir(src_dir)):
                    vid = av.open(src_dir + video)

                    for count, frame in enumerate(vid.decode()):
                        image = frame.to_image()
                        image.save(f'{dest_dir}frame_{count}_vid_{id}.jpg')

In [None]:
extract_frames_train_val(DS_CDFV1)

# ResNet-18 Model

### Testing logic

In [3]:
class ResnetBlock(Model):

    def __init__(self, channels, input_size):
        super().__init__()

        self.conv_1 = Conv2D(input_shape=input_size, filters=channels, kernel_size=3, padding="same")
        
        self.bn_1 = BatchNormalization()

        self.conv_2 = Conv2D(input_shape=input_size, filters=channels, kernel_size=3, padding="same")
        
        self.bn_2 = BatchNormalization()

    def call(self, inputs):
        out = self.conv_1()
        out = self.bn_1(out)
        out = tf.nn.relu(out)
        out = self.conv_2(out)
        out = self.bn_2(out)
        out = tf.nn.relu(out)

        return out

In [4]:
class ResNet18(Model):

    def __init__(self, num_classes, input_size, **kwargs):
        super().__init__(**kwargs)

        self.conv_1 = Conv2D(input_shape=input_size, filters=64, kernel_size=(7, 7), strides=2,
                             padding="same", kernel_initializer="he_normal")
        
        self.init_bn = BatchNormalization()

        self.pool_2 = MaxPool2D(pool_size=(3, 3), strides=2, padding="same")

        self.res_1_1 = ResnetBlock(64, (56, 56, 64))
        self.res_1_2 = ResnetBlock(64, (56, 56, 64))

        self.res_2_1 = ResnetBlock(128, (28, 28, 128))
        self.res_2_2 = ResnetBlock(128, (28, 28, 128))

        self.res_3_1 = ResnetBlock(256, (14, 14, 256))
        self.res_3_2 = ResnetBlock(256, (14, 14, 256))

        self.res_4_1 = ResnetBlock(512, (7, 7, 512))
        self.res_4_2 = ResnetBlock(512, (7, 7, 512))

        self.avg_pool = GlobalAveragePooling2D()
        self.flat = Flatten()
        self.fc = Dense(num_classes, activation="softmax")

    def call(self, input_size):
        out = self.conv_1()
        out = self.init_bn(out)
        out = tf.nn.relu(out)
        out = self.pool_2(out)

        for res_block in [self.res_1_1, self.res_1_2, self.res_2_1, self.res_2_2, self.res_3_1, self.res_3_2, self.res_4_1, self.res_4_2]:
            out = res_block(out)

        out = self.avg_pool(out)
        out = self.flat(out)
        out = self.fc(out)
        
        return out

### Resnet-18 architecture

In [27]:
def resnet_18(input_size, num_classes):
    model = keras.Sequential()

    # layer 1
    model.add(Conv2D(input_shape=input_size, filters=64, kernel_size=7, strides=2, padding="same")) 
    model.add(BatchNormalization())
    model.add(MaxPool2D(pool_size=(3, 3), strides=2, padding="same"))
    
    # Resblock 1
    model.add(Conv2D(input_shape=(112, 112, 64), filters=64, kernel_size=3, padding="same"))
    model.add(BatchNormalization()) 
    model.add(Conv2D(input_shape=(112, 112, 64), filters=64, kernel_size=3, padding="same"))
    model.add(BatchNormalization())

    # Resblock 2
    model.add(Conv2D(input_shape=(56, 56, 64), filters=128, kernel_size=3, padding="same"))
    model.add(BatchNormalization())  
    model.add(Conv2D(input_shape=(56, 56, 64), filters=128, kernel_size=1, strides=2, padding="same")) # downsample
    model.add(Conv2D(input_shape=(56, 56, 64), filters=128, kernel_size=3, padding="same"))
    model.add(BatchNormalization())

    # Resblock 3
    model.add(Conv2D(input_shape=(28, 28, 128), filters=256, kernel_size=3, padding="same"))
    model.add(BatchNormalization())  
    model.add(Conv2D(input_shape=(28, 28, 128), filters=256, kernel_size=1, strides=2, padding="same")) # downsample
    model.add(Conv2D(input_shape=(28, 28, 128), filters=256, kernel_size=3, padding="same"))
    model.add(BatchNormalization())

    # Resblock 4
    model.add(Conv2D(input_shape=(14, 14, 256), filters=512, kernel_size=3, padding="same"))
    model.add(BatchNormalization())  
    model.add(Conv2D(input_shape=(14, 14, 256), filters=512, kernel_size=1, strides=2, padding="same")) # downsample
    model.add(Conv2D(input_shape=(14, 14, 256), filters=512, kernel_size=3, padding="same"))
    model.add(BatchNormalization())

    # Fully connected layer
    model.add(GlobalAveragePooling2D())
    model.add(Flatten())
    model.add(Dense(num_classes, activation="softmax"))

    return model

In [29]:
input_size = (224, 224, 3)
num_classes = 2

model = resnet_18(input_size, num_classes)

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    0.001,
    decay_steps = 10,
    decay_rate = 0.1,
    staircase = False)

model.compile(optimizer = Adam(learning_rate = lr_schedule), 
              loss = 'categorical_crossentropy', 
              metrics = [keras.metrics.CategoricalAccuracy(), 
                         keras.metrics.Precision(), 
                         keras.metrics.Recall(),
                         keras.metrics.AUC()])
model.summary()

Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_158 (Conv2D)         (None, 112, 112, 64)      9472      
                                                                 
 batch_normalization_141 (Ba  (None, 112, 112, 64)     256       
 tchNormalization)                                               
                                                                 
 max_pooling2d_13 (MaxPoolin  (None, 56, 56, 64)       0         
 g2D)                                                            
                                                                 
 conv2d_159 (Conv2D)         (None, 56, 56, 64)        36928     
                                                                 
 batch_normalization_142 (Ba  (None, 56, 56, 64)       256       
 tchNormalization)                                               
                                                     

# Training

In [30]:
def get_train_val_dataset(train_src, val_src):

  train_ds = keras.utils.image_dataset_from_directory(
        directory = train_src,
        labels = 'inferred',
        label_mode = 'categorical',
        batch_size = 32,
        image_size = (224, 224),
        color_mode = 'rgb',
        shuffle = True,
        seed = 1
    )

  for data, labels in train_ds.take(1):
    print(data.shape)

  val_ds = keras.utils.image_dataset_from_directory(
        directory = val_src,
        labels = 'inferred',
        label_mode = 'categorical',
        batch_size = 32,
        image_size = (224, 224),
        color_mode = 'rgb',
        shuffle = True,
        seed = 1
    )

  for data, labels in val_ds.take(1):
    print(data.shape)

  return train_ds, val_ds

In [31]:
train_src = DS_CDFV1 + DS_TEMPORAL + DS_TRAIN
val_src = DS_CDFV1 + DS_TEMPORAL + DS_VAL

In [32]:
train_ds, val_ds = get_train_val_dataset(train_src, val_src)

Found 359405 files belonging to 2 classes.
(32, 224, 224, 3)
Found 89167 files belonging to 2 classes.
(32, 224, 224, 3)


In [None]:
# Load from checkpoint (if exists)
try:
    saved_model = keras.models.load_model(MODELS + 'temporal_stream')
    model = saved_model

except IOError:
    pass

In [33]:
max_epochs = 1

In [None]:
history = model.fit(train_ds, 
          epochs = max_epochs, 
          validation_data = val_ds,
          callbacks = keras.callbacks.ModelCheckpoint(MODELS + 'temporal_stream'),
          verbose = 1)

In [None]:
with open(MODELS + '/history', 'wb') as f:
    pickle.dump(history, f)

In [None]:
with open(MODELS + '/history', 'rb') as f:
    history = pickle.load(f)

# Testing

In [None]:
# Load from checkpoint (if exists)
try:
    saved_model = keras.models.load_model(MODELS + 'temporal_stream')
    model = saved_model

except IOError:
    pass

In [38]:
def predict_class(img_path):
    img = image.load_img(img_path, target_size=(224, 224))
    img_tensor = image.img_to_array(img)                   
    img_tensor = np.expand_dims(img_tensor, axis=0)         
    prediction = model.predict(img_tensor)
    return prediction                                      

In [39]:
def testing(ds, main_segment):
    actual = []
    predicted = []
    
    for class_label in CLASS:
        final_actual = [1, 0] if class_label == CLASS_REAL else [0, 1]

        src_dir = ds + DS_TEMPORAL + DS_TEST + main_segment + class_label

        for vid_dir in os.listdir(src_dir):
            all_segment_softmax = []

            for child_segment in os.listdir(f'{src_dir}{vid_dir}'):
                segment_sum = [0, 0]
                frames_in_segment = 0

                # obtain individual image prediction
                for image in os.listdir(f'{src_dir}{vid_dir}/{child_segment}'):
                    pred_class = predict_class(f'{src_dir}{vid_dir}/{child_segment}/{image}')
                    
                    # compute segment sum
                    segment_sum[0] += pred_class[0]
                    segment_sum[1] += pred_class[1]

                    frames_in_segment += 1
                
                #compute segment average
                segment_sum[0] /= frames_in_segment
                segment_sum[1] /= frames_in_segment

                #compute softmax of segment
                segment_sum = softmax(np.array(segment_sum))
                all_segment_softmax.append(segment_sum)

            # compute segment wise sum
            final_pred = [0, 0]
            for pred in all_segment_softmax:
                final_pred[0] += pred[0]
                final_pred[1] += pred[1]

            #compute overall softmax
            final_pred = softmax(np.array(final_pred))

            predicted.append(final_pred)
            actual.append(final_actual)

    return actual, predicted
    

In [None]:
actual, predicted = testing(DS_CDFV1, SEG_1)

In [40]:
def metrics(actual, predicted):
    m = keras.metrics.CategoricalAccuracy()
    m.update_state(actual, predicted)
    print(f'Categorical Accuracy - {m.result().numpy()}')
    
    m = keras.metrics.Precision()
    m.update_state(actual, predicted)
    print(f'Precision - {m.result().numpy()}')

    m = keras.metrics.Recall()
    m.update_state(actual, predicted)
    print(f'Recall - {m.result().numpy()}')

    m = keras.metrics.AUC()
    m.update_state(actual, predicted)
    print(f'AUC - {m.result().numpy()}')

In [None]:
metrics(actual, predicted)