In [3]:
# !mkdir -p ~/.kaggle
# !cp kaggle.json ~/.kaggle/
# !chmod 600 ~/.kaggle/kaggle.json

# !kaggle competitions download -c deepfake-detection-challenge

# !unzip deepfake-detection-challenge.zip

deepfake-detection-challenge.zip: Skipping, found more recently modified local copy (use --force to force download)
Archive:  deepfake-detection-challenge.zip
replace sample_submission.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace test_videos/aassnaulhq.mp4? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace test_videos/aayfryxljh.mp4? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [None]:
import os
import json
import cv2
import numpy as np
from scipy.signal import butter, filtfilt
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from sklearn.model_selection import train_test_split

# Load the pre-trained face detector
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

def extract_frames_and_detect_faces(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray, 1.1, 4)
        for (x, y, w, h) in faces:
            roi_color = frame[y:y+h, x:x+w]
            frames.append(roi_color)
    cap.release()
    return frames

def extract_green_channel_signal(frames):
    signals = []
    for frame in frames:
        height, width, _ = frame.shape
        forehead_region = frame[:height//3, :]
        green_channel = forehead_region[:, :, 1]
        mean_green_value = np.mean(green_channel)
        signals.append(mean_green_value)
    return signals

def bandpass_filter(signal, lowcut, highcut, fs, order=5):
    nyquist = 0.5 * fs
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = butter(order, [low, high], btype='band')
    y = filtfilt(b, a, signal)
    return y

def create_spatiotemporal_map(signal, width, height):
    spatiotemporal_map = np.zeros((height, width))
    signal_length = len(signal)
    for i in range(height):
        for j in range(width):
            if i < signal_length:
                spatiotemporal_map[i, j] = signal[i]
    return spatiotemporal_map

def process_video(video_path, map_width=50, lowcut=0.8, highcut=2.5, fs=30):
    frames = extract_frames_and_detect_faces(video_path)
    green_channel_signal = extract_green_channel_signal(frames)
    filtered_signal = bandpass_filter(green_channel_signal, lowcut, highcut, fs)
    map_height = len(filtered_signal)
    spatiotemporal_map = create_spatiotemporal_map(filtered_signal, map_width, map_height)
    return spatiotemporal_map

def process_videos_in_folder(folder_path, metadata_path):
    # Load metadata
    with open(metadata_path, 'r') as f:
        metadata = json.load(f)

    spatiotemporal_maps = []
    labels = []
    for filename in os.listdir(folder_path):
        if filename.endswith('.mp4') and filename in metadata:
            video_path = os.path.join(folder_path, filename)
            spatiotemporal_map = process_video(video_path)
            spatiotemporal_maps.append(spatiotemporal_map)
            label = 1 if metadata[filename]['label'] == 'REAL' else 0
            labels.append(label)
    return np.array(spatiotemporal_maps), np.array(labels)

# Example folder path and metadata path
folder_path = 'train_sample_videos'
metadata_path = os.path.join(folder_path, 'metadata.json')
spatiotemporal_maps, labels = process_videos_in_folder(folder_path, metadata_path)


In [None]:
# Add channel dimension for grayscale images
spatiotemporal_maps = np.expand_dims(spatiotemporal_maps, axis=-1)

# Shuffle the data
indices = np.arange(len(labels))
np.random.shuffle(indices)
spatiotemporal_maps = spatiotemporal_maps[indices]
labels = labels[indices]

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(spatiotemporal_maps, labels, test_size=0.2, random_state=42)


In [None]:
def build_cnn_model(input_shape):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

input_shape = (spatiotemporal_maps.shape[1], spatiotemporal_maps.shape[2], 1)
model = build_cnn_model(input_shape)

model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))


In [None]:
# Evaluate the model
loss, accuracy = model.evaluate(X_val, y_val)
print(f'Validation Accuracy: {accuracy * 100:.2f}%')

# Predict on new videos
def predict_video(video_path):
    spatiotemporal_map = process_video(video_path)
    spatiotemporal_map = np.expand_dims(spatiotemporal_map, axis=0)
    spatiotemporal_map = np.expand_dims(spatiotemporal_map, axis=-1)
    prediction = model.predict(spatiotemporal_map)
    return 'Real' if prediction[0][0] > 0.5 else 'Fake'

# Example prediction
test_video_path = '/content/test_videos/aassnaulhq.mp4'
print(predict_video(test_video_path))

In [None]:
# Define the file path to save the entire model
model_file_path = 'model.h5'

# Save the entire model
model.save(model_file_path)


In [None]:
# Define the file path to save the model weights
weights_file_path = 'model_weights.h5'

# Save the model weights
model.save_weights(weights_file_path)


In [None]:
import os
import json
import cv2
import numpy as np
from scipy.signal import butter, filtfilt
import tensorflow as tf
from tensorflow.keras.models import load_model, Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# Load the pre-trained face detector
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

def extract_frames_and_detect_faces(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = face_cascade.detectMultiScale(gray, 1.1, 4)
        for (x, y, w, h) in faces:
            roi_color = frame[y:y+h, x:x+w]
            frames.append(roi_color)
    cap.release()
    return frames

def extract_green_channel_signal(frames):
    signals = []
    for frame in frames:
        height, width, _ = frame.shape
        forehead_region = frame[:height//3, :]
        green_channel = forehead_region[:, :, 1]
        mean_green_value = np.mean(green_channel)
        signals.append(mean_green_value)
    return signals

def bandpass_filter(signal, lowcut, highcut, fs, order=5):
    nyquist = 0.5 * fs
    low = lowcut / nyquist
    high = highcut / nyquist
    b, a = butter(order, [low, high], btype='band')
    y = filtfilt(b, a, signal)
    return y

def create_spatiotemporal_map(signal, width, height):
    spatiotemporal_map = np.zeros((height, width))
    signal_length = len(signal)
    for i in range(height):
        for j in range(width):
            if i < signal_length:
                spatiotemporal_map[i, j] = signal[i]
    return spatiotemporal_map

def process_video(video_path, map_width=50, lowcut=0.8, highcut=2.5, fs=30):
    frames = extract_frames_and_detect_faces(video_path)
    green_channel_signal = extract_green_channel_signal(frames)
    filtered_signal = bandpass_filter(green_channel_signal, lowcut, highcut, fs)
    map_height = len(filtered_signal)
    spatiotemporal_map = create_spatiotemporal_map(filtered_signal, map_width, map_height)
    return spatiotemporal_map

# If you saved the entire model
model_file_path = 'model.h5'
model = load_model(model_file_path)

# If you saved just the model weights
def build_cnn_model(input_shape):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

input_shape = (None, 50, 1)  # Adjust the input shape if necessary
model = build_cnn_model(input_shape)
weights_file_path = 'model_weights.h5'
model.load_weights(weights_file_path)

# Predict on new videos
def predict_video(video_path):
    spatiotemporal_map = process_video(video_path)
    spatiotemporal_map = np.expand_dims(spatiotemporal_map, axis=0)
    spatiotemporal_map = np.expand_dims(spatiotemporal_map, axis=-1)
    prediction = model.predict(spatiotemporal_map)
    return 'Real' if prediction[0][0] > 0.5 else 'Fake'

# Example prediction
test_video_path = '/content/test_videos/aassnaulhq.mp4'
print(predict_video(test_video_path))
