In [1]:
import json
import os
import cv2
from multiprocessing import Pool, cpu_count
from concurrent.futures import ThreadPoolExecutor
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
print(tf.config.list_physical_devices('GPU'))
from mtcnn.mtcnn import MTCNN

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


I0000 00:00:1725158060.989705 2411579 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1725158061.015924 2411579 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
I0000 00:00:1725158061.016079 2411579 cuda_executor.cc:1015] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355


In [2]:
data_dir = './data/dfdc_train_part_0'
metadata_path = os.path.join(data_dir, 'metadata.json')
frames_dir  = os.path.join(data_dir, 'frames')
faces_dir = os.path.join(data_dir, 'cropped_faces')

# Load metadata
with open(metadata_path, 'r') as f:
    metadata = json.load(f)

In [13]:
# Preprocessing - Extract frames from videos

def extract_frames_single_video(args):
    video_path, output_dir, fps = args
    video_name = os.path.splitext(os.path.basename(video_path))[0]
    output_video_dir = os.path.join(output_dir, video_name)
    
    if not os.path.exists(output_video_dir):
        os.makedirs(output_video_dir)
    
    vidcap = cv2.VideoCapture(video_path)
    success, image = vidcap.read()
    count = 0
    while success:
        if count % fps == 0:
            frame_id = int(count / fps)
            cv2.imwrite(os.path.join(output_video_dir, f"frame_{frame_id}.jpg"), image)
        success, image = vidcap.read()
        count += 1

def extract_frames_parallel(video_dir, output_dir, fps=1):
    video_files = [os.path.join(video_dir, f) for f in os.listdir(video_dir) if f.endswith('.mp4')]
    args = [(video, output_dir, fps) for video in video_files]

    with Pool(cpu_count()) as pool:
        pool.map(extract_frames_single_video, args)

extract_frames_parallel(video_dir=data_dir, output_dir=frames_dir, fps=10)

In [14]:
import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)
import shutil

In [None]:
def organize_frames(metadata, frames_dir, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    real_dir = os.path.join(output_dir, 'real')
    fake_dir = os.path.join(output_dir, 'fake')

    if not os.path.exists(real_dir):
        os.makedirs(real_dir)
    
    if not os.path.exists(fake_dir):
        os.makedirs(fake_dir)

    for video_filename, details in metadata.items():
        label = details['label']
        video_name = os.path.splitext(video_filename)[0]
        video_frames_dir = os.path.join(frames_dir, video_name)

        if not os.path.exists(video_frames_dir):
            continue  # Skip if frames directory does not exist
        
        target_dir = real_dir if label == 'REAL' else fake_dir

        # Move or copy frames to target directory
        for frame in os.listdir(video_frames_dir):
            frame_path = os.path.join(video_frames_dir, frame)
            shutil.copy(frame_path, target_dir)

# Call the function to organize frames
organize_frames(metadata, frames_dir, output_dir)

In [None]:
cv2.destroyAllWindows()

In [2]:
for key, value in metadata.items():
    print(os.path.splitext(key)[0])
    print(value)
    break

NameError: name 'metadata' is not defined

In [3]:
file_names = []
labels = []

for key, value in metadata.items():
    file_names.append(key)
    labels.append(1 if value['label'] == 'FAKE' else 0)

In [None]:
def load_frames_from_video(video_dir, num_frames=10, img_size=(224, 224)):
    frames = []
    frame_files = sorted([os.path.join(video_dir, img) for img in os.listdir(video_dir) if img.endswith('.jpg') or img.endswith('.png')])
    
    for img_path in frame_files[:num_frames]:
        img = tf.keras.preprocessing.image.load_img(img_path, target_size=img_size)
        img = tf.keras.preprocessing.image.img_to_array(img)
        img = tf.keras.applications.vit(img)  # Apply appropriate preprocessing
        frames.append(img)
    
    return tf.stack(frames)

def create_dataset(file_names, labels, base_path, num_frames=10, img_size=(224, 224)):
    videos, labels_out = [], []
    
    for file_name, label in zip(file_names, labels):
        video_dir = os.path.join(base_path, file_name.replace('.mp4', ''))
        if os.path.exists(video_dir):
            frames = load_frames_from_video(video_dir, num_frames, img_size)
            videos.append(frames)
            labels_out.append(label)
    
    return tf.data.Dataset.from_tensor_slices((tf.convert_to_tensor(videos), tf.convert_to_tensor(labels_out)))

# Define paths
base_path = 'path_to_frames_folder/frames'

# Create the datasets
train_dataset = create_dataset(train_files, train_labels, base_path)
val_dataset = create_dataset(val_files, val_labels, base_path)

In [None]:
import tensorflow_models as tfm
tfm.vision.backbones.VisionTransformer(
    input_specs=tf.keras.layers.InputSpec(shape=[None, None, None, 3]),
    patch_size=16,
    hidden_size=768,
    num_layers=12,
    num_heads=12,
)