In [None]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv3D, MaxPooling3D, LSTM, Dense, Dropout, BatchNormalization, TimeDistributed, Flatten, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.metrics import classification_report, confusion_matrix, f1_score, roc_auc_score
from sklearn.model_selection import train_test_split
import numpy as np
import glob
import cv2
import os
import matplotlib.pyplot as plt

# Check for GPU availability and set memory growth before any other TensorFlow operations
physical_devices = tf.config.list_physical_devices('GPU')
if len(physical_devices) > 0:
    print(f"Using GPU: {physical_devices}")

# Constants
IMG_SIZE = (128, 128)
MAX_FRAMES = 30
BATCH_SIZE = 16
EPOCHS = 30
LEARNING_RATE = 0.0005
WEIGHT_DECAY = 0.0001

# Data Augmentation
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1.0/255.0,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2
)

test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1.0/255.0)

# Paths
real_videos = '/kaggle/input/dataset/data/real'
fake_videos = '/kaggle/input/dataset/data/fake'

# Function to extract frames from videos
def extract_frames(video_path, img_size=IMG_SIZE, max_frames=MAX_FRAMES):
    cap = cv2.VideoCapture(video_path)
    frames = []
    frame_count = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if frame_count >= max_frames:
            break
        frame = cv2.resize(frame, img_size)
        frames.append(frame)
        frame_count += 1
    cap.release()
    # Padding with zeros if there are fewer frames than MAX_FRAMES
    if len(frames) < max_frames:
        frames.extend([np.zeros_like(frames[0])] * (max_frames - len(frames)))
    return np.array(frames)

# Load and preprocess dataset
def load_dataset(real_videos, fake_videos, img_size=IMG_SIZE, max_frames=MAX_FRAMES):
    real_paths = glob.glob(os.path.join(real_videos, '*.mp4'))
    fake_paths = glob.glob(os.path.join(fake_videos, '*.mp4'))

    data, labels = [], []
    
    for video_path in real_paths:
        frames = extract_frames(video_path, img_size, max_frames)
        data.append(frames)
        labels.append(0)  # Real video is labeled as 0

    for video_path in fake_paths:
        frames = extract_frames(video_path, img_size, max_frames)
        data.append(frames)
        labels.append(1)  # Fake video is labeled as 1
    
    return np.array(data), np.array(labels)

# Load dataset
data, labels = load_dataset(real_videos, fake_videos)

# Split into train, test, validation
train_data, test_data, train_labels, test_labels = train_test_split(data, labels, test_size=0.2, random_state=42)
train_data, val_data, train_labels, val_labels = train_test_split(train_data, train_labels, test_size=0.2, random_state=42)

# Model Building: CNN + LSTM
input_shape = (MAX_FRAMES, IMG_SIZE[0], IMG_SIZE[1], 3)

input_flow = Input(shape=input_shape)
x = Conv3D(32, (3, 3, 3), activation='relu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(WEIGHT_DECAY))(input_flow)
x = MaxPooling3D((2, 2, 2))(x)
x = BatchNormalization()(x)

x = Conv3D(64, (3, 3, 3), activation='relu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(WEIGHT_DECAY))(x)
x = MaxPooling3D((2, 2, 2))(x)
x = BatchNormalization()(x)

x = Conv3D(128, (3, 3, 3), activation='relu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(WEIGHT_DECAY))(x)
x = MaxPooling3D((2, 2, 2))(x)
x = BatchNormalization()(x)

x = Conv3D(256, (3, 3, 3), activation='relu', padding='same', kernel_regularizer=tf.keras.regularizers.l2(WEIGHT_DECAY))(x)
x = MaxPooling3D((2, 2, 2))(x)
x = BatchNormalization()(x)

x = TimeDistributed(Flatten())(x)
x = LSTM(128, return_sequences=True)(x)
x = LSTM(64)(x)

x = Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(WEIGHT_DECAY))(x)
x = Dropout(0.5)(x)
output = Dense(1, activation='sigmoid')(x)

model = Model(inputs=input_flow, outputs=output)
model.compile(optimizer=Adam(learning_rate=LEARNING_RATE), loss='binary_crossentropy', metrics=['accuracy'])

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)

# Clear memory before training
tf.keras.backend.clear_session()

# Training the Model
history = model.fit(
    train_data, train_labels, 
    validation_data=(val_data, val_labels), 
    epochs=EPOCHS, 
    batch_size=BATCH_SIZE,
    callbacks=[early_stopping, lr_scheduler],
    verbose=1
)

# Save the Model
model.save('video_classification_model.h5')

# Plot training and validation graphs
def plot_training_history(history, save_path_loss='plot_loss.png', save_path_accuracy='plot_accuracy.png'):
    plt.figure(figsize=(12, 4))

    # Plot Accuracy
    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')

    # Save Accuracy Plot
    plt.savefig(save_path_accuracy)  # Save the accuracy plot
    plt.clf()  # Clear the figure

    # Plot Loss
    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')

    # Save Loss Plot
    plt.savefig(save_path_loss)  # Save the loss plot
    plt.show()

# Save the training and validation graphs
plot_training_history(history, 'training_accuracy_plot.png', 'training_loss_plot.png')

# Evaluation
test_loss, test_accuracy = model.evaluate(test_data, test_labels)
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')

# Predictions and metrics
preds_proba = model.predict(test_data)
preds = (preds_proba > 0.5).astype("int32")

# Classification Report
report = classification_report(test_labels, preds, target_names=['Real', 'Fake'], output_dict=True)

# Extract metrics for Fake
precision_fake = report['Fake']['precision']
recall_fake = report['Fake']['recall']
f1_score_fake = report['Fake']['f1-score']

# Extract metrics for Real
precision_real = report['Real']['precision']
recall_real = report['Real']['recall']
f1_score_real = report['Real']['f1-score']

# Overall Accuracy
overall_accuracy = report['accuracy']

# Print Metrics
print(f"Precision for Fake: {precision_fake:.2f}")
print(f"Recall for Fake: {recall_fake:.2f}")
print(f"F1-Score for Fake: {f1_score_fake:.2f}")
print(f"Precision for Real: {precision_real:.2f}")
print(f"Recall for Real: {recall_real:.2f}")
print(f"F1-Score for Real: {f1_score_real:.2f}")
print(f"Overall Accuracy: {overall_accuracy:.2f}")

# Display full classification report
print("\nFull Classification Report:")
print(classification_report(test_labels, preds, target_names=['Real', 'Fake']))

# Confusion Matrix
cm = confusion_matrix(test_labels, preds)
print('Confusion Matrix:')
print(cm)

# Model Summary
model.summary()
