In [10]:
!pip install tensorflow



In [None]:
# !pip install opencv-contrib-python
# !pip install imageio

from tensorflow import keras
import matplotlib.pyplot as plt
import tensorflow as tf
import pandas as pd
import numpy as np 
import imageio
import cv2
import os
from IPython.display import HTML
from base64 import b64encode
from sklearn.model_selection import train_test_split

In [None]:
DATA_FOLDER = r'C:\Users\HP\Downloads\DeepFake\dataset'
TRAIN_SAMPLE_FOLDER =  r'C:\Users\HP\Downloads\DeepFake\dataset\train_sample_videos'
TEST_FOLDER =r'C:\Users\HP\Downloads\DeepFake\dataset\test_videos'

print(f"train samples: {len(os.listdir(os.path.join(DATA_FOLDER, TRAIN_SAMPLE_FOLDER)))}")
print(f"test samples: {len(os.listdir(os.path.join(DATA_FOLDER, TEST_FOLDER)))}")  




In [None]:
train_sample_metadata = pd.read_json(r'C:\Users\HP\Downloads\DeepFake\dataset\train_sample_videos\metadata.json').T
train_sample_metadata.head() 

In [None]:
train_sample_metadata.groupby('label')['label'].count().plot(figsize=(5,5),kind='bar',title='The Label in the Training Set')
plt.show()

In [None]:
train_sample_metadata.shape

In [None]:
# Get list of video file names
f_videos = list(train_sample_metadata[train_sample_metadata.label == 'FAKE'].index)
r_videos = list(train_sample_metadata[train_sample_metadata.label == 'REAL'].index)

# Function to play video in notebook
def play_video(video_file, subset='train_sample_videos'):
    video_path = os.path.join(DATA_FOLDER, subset, video_file)
    video_url = open(video_path, 'rb').read()
    data_url = "data:video/mp4;base64," + b64encode(video_url).decode()
    return HTML(f"""<video width=500 controls><source src="{data_url}" type="video/mp4"></video>""")


In [None]:
img_size = 224
max_seq_length = 20
num_features = 2048

def crop_center_square(frame):
    y, x = frame.shape[:2]
    min_dim = min(y, x)
    start_x = (x - min_dim) // 2
    start_y = (y - min_dim) // 2
    return frame[start_y:start_y+min_dim, start_x:start_x+min_dim]

def load_video(path, max_frames=0, resize=(img_size, img_size)):
    cap = cv2.VideoCapture(path)
    frames = []
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frame = crop_center_square(frame)
            frame = cv2.resize(frame, resize)
            frame = frame[:, :, [2, 1, 0]]  # BGR to RGB
            frames.append(frame)
            if len(frames) == max_frames:
                break
    finally:
        cap.release()
    return np.array(frames)


In [None]:
def pretrain_feature_extractor():
    base_model = keras.applications.InceptionV3(
        weights="imagenet", include_top=False, pooling="avg", input_shape=(img_size, img_size, 3)
    )
    preprocess_input = keras.applications.inception_v3.preprocess_input

    inputs = keras.Input((img_size, img_size, 3))
    preprocessed = preprocess_input(inputs)
    outputs = base_model(preprocessed)
    return keras.Model(inputs, outputs, name="feature_extractor")

feature_extractor = pretrain_feature_extractor()


In [None]:
def prepare_all_videos(df, root_dir):
    num_samples = len(df)
    video_paths = list(df.index)
    labels = np.array(df["label"] == 'FAKE').astype(int)

    frame_masks = np.zeros((num_samples, max_seq_length), dtype="bool")
    frame_features = np.zeros((num_samples, max_seq_length, num_features), dtype="float32")

    for idx, path in enumerate(video_paths):
        frames = load_video(os.path.join(DATA_FOLDER, root_dir, path))
        frames = frames[None, ...]
        temp_mask = np.zeros((1, max_seq_length), dtype="bool")
        temp_features = np.zeros((1, max_seq_length, num_features), dtype="float32")

        length = min(max_seq_length, frames.shape[1])
        for j in range(length):
            temp_features[0, j, :] = feature_extractor.predict(frames[:, j, :])[0]
        temp_mask[0, :length] = 1

        frame_features[idx] = temp_features.squeeze()
        frame_masks[idx] = temp_mask.squeeze()

    return (frame_features, frame_masks), labels


In [None]:
Train_set, Test_set = train_test_split(
    train_sample_metadata, test_size=0.1, random_state=42, stratify=train_sample_metadata['label']
)
print("Train shape:", Train_set.shape)
print("Test shape:", Test_set.shape)

train_data, train_labels = prepare_all_videos(Train_set, 'train_sample_videos')
test_data, test_labels = prepare_all_videos(Test_set, 'train_sample_videos')

print(f"Train frame features shape: {train_data[0].shape}")
print(f"Train frame masks shape: {train_data[1].shape}")


In [None]:
frame_features_input = keras.Input((max_seq_length, num_features))
mask_input = keras.Input((max_seq_length,), dtype="bool")

x = keras.layers.GRU(16, return_sequences=True)(frame_features_input, mask=mask_input)
x = keras.layers.GRU(8)(x)
x = keras.layers.Dropout(0.4)(x)
x = keras.layers.Dense(8, activation="relu")(x)
output = keras.layers.Dense(1, activation="sigmoid")(x)

model = keras.Model([frame_features_input, mask_input], output)
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
model.summary()

checkpoint = keras.callbacks.ModelCheckpoint(
    './best_model.weights.h5', save_weights_only=True, save_best_only=True
)

history = model.fit(
    [train_data[0], train_data[1]],
    train_labels,
    validation_data=([test_data[0], test_data[1]], test_labels),
    callbacks=[checkpoint],
    epochs=70,
    batch_size=8
)


In [None]:
def prepare_single_video(frames):
    frames = frames[None, ...]
    frame_mask = np.zeros((1, max_seq_length), dtype="bool")
    frame_features = np.zeros((1, max_seq_length, num_features), dtype="float32")

    video_length = frames.shape[1]
    length = min(max_seq_length, video_length)
    for j in range(length):
        frame_features[0, j, :] = feature_extractor.predict(frames[:, j, :])[0]
    frame_mask[0, :length] = 1

    return frame_features, frame_mask

def sequence_prediction(path):
    frames = load_video(os.path.join(TEST_FOLDER, path))
    frame_features, frame_mask = prepare_single_video(frames)
    return model.predict([frame_features, frame_mask])[0]


In [None]:
# Load random or fixed video
test_videos = pd.DataFrame(os.listdir(TEST_FOLDER), columns=["video"])
test_video = np.random.choice(test_videos["video"])
print(f"Test video: {test_video}")

# Predict and display result
prediction = sequence_prediction(test_video)
if prediction >= 0.5:
    print("Predicted class: FAKE")
else:
    print("Predicted class: REAL")

# Show video in notebook
play_video(test_video, 'test_videos')
