In [21]:
import json
import os
import pandas as pd
import numpy as np
import cv2
import tensorflow_io as tfio

# read the metadata.json file
with open('metadata.json') as f:
    metadata = json.load(f)

# create a dataframe with the metadata
df = pd.DataFrame(metadata).T

# remove the split column
df = df.drop(columns=['split'])

# create 1 and 0 labels from the label column
df['label'] = df.label.apply(lambda x: 1 if x == 'FAKE' else 0)

y = df.label.values

df.head()

Unnamed: 0,label,original
owxbbpjpch.mp4,1,wynotylpnm.mp4
vpmyeepbep.mp4,0,
fzvpbrzssi.mp4,0,
htorvhbcae.mp4,1,wclvkepakb.mp4
fckxaqjbxk.mp4,1,vpmyeepbep.mp4


In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.python.keras.layers import Dense, Activation, Input, Input, Conv1D, Conv2D, MaxPooling1D
from tensorflow.python.keras.layers import MaxPooling2D, Dense, Dropout, Activation, Flatten, InputLayer
from tensorflow.python.keras.layers import concatenate, Reshape, Lambda, Add, Multiply, Average, Subtract
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K

# print Tensorflow and CUDA information
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
print("Num CPUs Available: ", len(tf.config.experimental.list_physical_devices('CPU')))
print(f"Tensorflow version: {tf.__version__}")
print(f"Keras version: {keras.__version__}")

if tf.test.gpu_device_name():
    gpu_devices = tf.config.list_physical_devices('GPU')
    details = tf.config.experimental.get_device_details(gpu_devices[0])
    name = details.get('device_name', 'Unknown GPU')
    
    print(f"Using {name}")
else:
    print("No GPU found")

2023-03-09 04:48:13.312200: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2023-03-09 04:48:13.313382: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2023-03-09 04:48:13.383441: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:16:00.0 name: Tesla V100-SXM2-32GB computeCapability: 7.0
coreClock: 1.53GHz coreCount: 80 deviceMemorySize: 31.75GiB deviceMemoryBandwidth: 836.37GiB/s
2023-03-09 04:48:13.383475: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2023-03-09 04:48:13.386903: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2023-03-09 04:48:13.386950: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.so.10
2

Num GPUs Available:  1
Num CPUs Available:  1
Tensorflow version: 2.4.1
Keras version: 2.4.0
Using Tesla V100-SXM2-32GB


2023-03-09 04:48:13.404402: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-09 04:48:13.404539: I tensorflow/compiler/jit/xla_gpu_device.cc:99] Not creating XLA devices, tf_xla_enable_xla_devices not set
2023-03-09 04:48:13.406133: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:16:00.0 name: Tesla V100-SXM2-32GB computeCapability: 7.0
coreClock: 1.53GHz coreCount: 80 deviceMemorySize: 31.75GiB deviceMemoryBandwidth: 836.37GiB/s
2023-03-09 04:48:13.406164: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2023-03-09 04:48:13.406180: I tensorflow/stream_executor/platfor

In [3]:
# print the number of real and fake videos
print('Number of real videos: ', len(df[df.label == 'REAL']))
print('Number of fake videos: ', len(df[df.label == 'FAKE']))

Number of real videos:  0
Number of fake videos:  0


In [4]:
path = 'data/'

# get a list of all mp4 files
filenames = [f for f in os.listdir(path) if f.endswith('.mp4')]

# remove the .mp4 extension from the filenames
filenames = [f[:-4] for f in filenames]

In [5]:
PROCESS_AUDIO = False

if PROCESS_AUDIO:
    import librosa
    import moviepy.editor as mp
    from multiprocessing import Pool

    def save_audio_files():
        for i, file in enumerate(filenames):
            clip = mp.VideoFileClip(path + file + '.mp4')
            clip.audio.write_audiofile(path + file + '.wav', verbose=False, logger=None)

            # print the progress as a percentage
            print(f'{i / len(filenames) * 100:.2f}%', end='\r')
        print('Audio files saved')


    def save_audio_feature(file):
        y, sr = librosa.load(path + file + '.wav')

        frame_length = y.shape[0] // 300
        
        # discard trailing frames
        y = y[:(frame_length * 300) - 1]

        # this removes about half a frame of audio
        # in the model this lost time will be shared between the frames
        # this results in the error in each frame being very small

        # calculate the mfcc with 32 features
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=32, n_fft=frame_length, hop_length=frame_length)

        # save the mfcc
        np.save(path + file + '.npy', mfcc)

    def save_audio_feature_parallel():
        with Pool(16) as p:
            p.map(save_audio_feature, filenames)

        print('Audio features saved')

    save_audio_files()
    save_audio_feature_parallel()

# print the shape of a sample audio file
print(np.load(path + filenames[0] + '.npy').shape)

(32, 300)


In [6]:
# create a tensorflow dataset
dataset = tf.data.Dataset.list_files(path + '*.mp4')

def get_video_asarray(file):
    frames = []
    cap = cv2.VideoCapture(file.numpy().decode("utf-8"))
    read = True
    while read:
        read, img = cap.read()
        if read:
            img = preprocess_frame(img)
            frames.append(img)
    return np.stack(frames, axis=0)

def preprocess_frame(frame):
    # video is a numpy array of shape (height, width, 3)
    # convert the video to grayscale
    # frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # resize the video to 128x128
    frame = cv2.resize(frame, (80, 45))
    
    frame = np.transpose(frame, (1, 0, 2))

    # normalize the video
    frame = frame / 255

    return frame

def get_audio_asarray(file):
    file = file.numpy().decode("utf-8")
    
    return np.load(file[:-4] + '.npy').T

def preprocess_input(file):
    # load the video
    video = tf.py_function(get_video_asarray, [file], Tout=[tf.float32])
    audio = tf.py_function(get_audio_asarray, [file], Tout=[tf.float32])
    
    audio = tf.expand_dims(audio, axis=-1)

    return video, audio

# Use map to apply the preprocess_input function to each file in the dataset
dataset = dataset.map(preprocess_input)

# Split the dataset into two separate datasets for video and audio
video_dataset = dataset.map(lambda x, y: x)
audio_dataset = dataset.map(lambda x, y: y)

zipped_dataset = tf.data.Dataset.zip((video_dataset, audio_dataset))

2023-03-09 04:48:14.071658: I tensorflow/compiler/jit/xla_gpu_device.cc:99] Not creating XLA devices, tf_xla_enable_xla_devices not set
2023-03-09 04:48:14.073111: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:16:00.0 name: Tesla V100-SXM2-32GB computeCapability: 7.0
coreClock: 1.53GHz coreCount: 80 deviceMemorySize: 31.75GiB deviceMemoryBandwidth: 836.37GiB/s
2023-03-09 04:48:14.073153: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2023-03-09 04:48:14.073174: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2023-03-09 04:48:14.073186: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.so.10
2023-03-09 04:48:14.073197: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcufft.so.10

In [7]:
# preprocess the video and audio for the first video
for video, audio in zipped_dataset.take(1):
    print(f'Video shape: {video.shape}')
    print(f'Audio shape: {audio.shape}')

2023-03-09 04:48:14.586375: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2023-03-09 04:48:14.587005: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 3000000000 Hz


Video shape: (1, 300, 80, 45, 3)
Audio shape: (1, 300, 32, 1)


In [9]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv3D, Conv2D, Conv1D, Flatten, Dense, concatenate, MaxPooling3D, MaxPooling2D

# Define the text input shape
video_input = Input(shape=(300, 80, 45, 3), name='video_input')

# Define the image input shape
audio_input = Input(shape=(300, 32, 1), name='audio_input')

# 3D convolutional layers for video input
x = Conv3D(16, kernel_size=(10,4,3), activation='relu')(video_input)
x = MaxPooling3D(pool_size=(5,3,3))(x)
x = Conv3D(32, kernel_size=(10,4,3), activation='relu')(x)
x = MaxPooling3D(pool_size=(3,4,3))(x)
x = Conv3D(64, kernel_size=(10,4,3), activation='relu')(x)
x = Flatten()(x)

# 2D convolutional layers for audio input
y = Conv2D(16, kernel_size=(3,3), activation='relu')(audio_input)
y = MaxPooling2D(pool_size=(2,2))(y)
y = Conv2D(32, kernel_size=(3,3), activation='relu')(y)
y = MaxPooling2D(pool_size=(2,2))(y)
y = Conv2D(64, kernel_size=(3,3), activation='relu')(y)
y = Flatten()(y)

# 1D convolutional layers for both inputs
z = Conv1D(16, kernel_size=4, activation='relu')(audio_input)
z = Conv1D(32, kernel_size=8, activation='relu')(z)
z = Conv1D(64, kernel_size=12, activation='relu')(z)
z = Flatten()(z)

# Concatenate the outputs from all three convolutional layers
merged = concatenate([x, y, z])

# Dense layers for classification
merged = Dense(64, activation='relu')(merged)
merged = Dense(32, activation='relu')(merged)
output = Dense(1, activation='sigmoid')(merged)

# Create the model
model = Model(inputs=[video_input, audio_input], outputs=output)

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
video_input (InputLayer)        [(None, 300, 80, 45, 0                                            
__________________________________________________________________________________________________
audio_input (InputLayer)        [(None, 300, 32, 1)] 0                                            
__________________________________________________________________________________________________
conv3d_1 (Conv3D)               (None, 291, 77, 43,  5776        video_input[0][0]                
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 298, 30, 16)  160         audio_input[0][0]                
______________________________________________________________________________________________

In [22]:
y = tf.data.Dataset.from_tensor_slices(y.tolist())
dataset_with_labels = tf.data.Dataset.zip((zipped_dataset, y))

model.fit(dataset_with_labels, epochs=10, batch_size=32)

Epoch 1/10


ValueError: in user code:

    /users/mbrubaker/.conda/envs/jupyter_37/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py:805 train_function  *
        return step_function(self, iterator)
    /users/mbrubaker/.conda/envs/jupyter_37/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py:795 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /users/mbrubaker/.conda/envs/jupyter_37/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /users/mbrubaker/.conda/envs/jupyter_37/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /users/mbrubaker/.conda/envs/jupyter_37/lib/python3.7/site-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
        return fn(*args, **kwargs)
    /users/mbrubaker/.conda/envs/jupyter_37/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py:788 run_step  **
        outputs = model.train_step(data)
    /users/mbrubaker/.conda/envs/jupyter_37/lib/python3.7/site-packages/tensorflow/python/keras/engine/training.py:756 train_step
        y, y_pred, sample_weight, regularization_losses=self.losses)
    /users/mbrubaker/.conda/envs/jupyter_37/lib/python3.7/site-packages/tensorflow/python/keras/engine/compile_utils.py:203 __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    /users/mbrubaker/.conda/envs/jupyter_37/lib/python3.7/site-packages/tensorflow/python/keras/losses.py:152 __call__
        losses = call_fn(y_true, y_pred)
    /users/mbrubaker/.conda/envs/jupyter_37/lib/python3.7/site-packages/tensorflow/python/keras/losses.py:256 call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    /users/mbrubaker/.conda/envs/jupyter_37/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /users/mbrubaker/.conda/envs/jupyter_37/lib/python3.7/site-packages/tensorflow/python/keras/losses.py:1608 binary_crossentropy
        K.binary_crossentropy(y_true, y_pred, from_logits=from_logits), axis=-1)
    /users/mbrubaker/.conda/envs/jupyter_37/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /users/mbrubaker/.conda/envs/jupyter_37/lib/python3.7/site-packages/tensorflow/python/keras/backend.py:4979 binary_crossentropy
        return nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output)
    /users/mbrubaker/.conda/envs/jupyter_37/lib/python3.7/site-packages/tensorflow/python/util/dispatch.py:201 wrapper
        return target(*args, **kwargs)
    /users/mbrubaker/.conda/envs/jupyter_37/lib/python3.7/site-packages/tensorflow/python/ops/nn_impl.py:174 sigmoid_cross_entropy_with_logits
        (logits.get_shape(), labels.get_shape()))

    ValueError: logits and labels must have the same shape ((None, 1) vs ())
