In [None]:
# Importing basic libraries
import os
import numpy as np
import cv2
import matplotlib.pyplot as plt

# Deep learning libraries
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, LSTM, TimeDistributed
from tensorflow.keras.layers import Bidirectional, GRU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import Xception
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.utils import to_categorical

# Additional libraries
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
real_videos_path = '/content/drive/MyDrive/Celeb-DF/Celeb-real'
fake_videos_path = '/content/drive/MyDrive/Celeb-DF/Celeb-synthesis'

In [None]:
def load_data(data_dir, label, max_videos=100):
    """Load video data and return frames with labels."""
    videos, labels = [], []
    for i, filename in enumerate(os.listdir(data_dir)):
        if i == max_videos:
            break
        filepath = os.path.join(data_dir, filename)
        frames = extract_frames(filepath)
        if len(frames) == 30:  # Ensuring all videos have the same number of frames
            videos.append(frames)
            labels.append(label)
        else:
            print(f"Skipped {filename}: Not enough frames or corrupted video.")
    return np.array(videos), np.array(labels)


In [None]:
def extract_frames(video_path, max_frames=30, resize=(224, 224)):
    """Extract frames from a video file."""
    cap = cv2.VideoCapture(video_path)
    frames = []
    frame_count = 0
    while cap.isOpened() and frame_count < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, resize)
        frames.append(frame)
        frame_count += 1
    cap.release()
    if len(frames) < max_frames:
        print(f"Warning: {video_path} has only {len(frames)} frames.")
    return np.array(frames)


In [None]:
real_videos, real_labels = load_data(real_videos_path, 0)
fake_videos, fake_labels = load_data(fake_videos_path, 1)

# Check data sizes
print(f"Real videos loaded: {real_videos.shape[0]}")
print(f"Fake videos loaded: {fake_videos.shape[0]}")


Real videos loaded: 100
Fake videos loaded: 100


In [None]:
# Check if any videos are loaded
if len(real_videos) == 0 or len(fake_videos) == 0:
    raise ValueError("No video data loaded. Check your file paths and data loading functions.")

# Combine real and fake videos
X = np.concatenate((real_videos, fake_videos))
y = np.concatenate((real_labels, fake_labels))

# Split the data into train and test sets
# Adjust test_size or max_videos if there are not enough samples
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
def build_cnn_lstm_model():
    model = Sequential()
    model.add(TimeDistributed(Conv2D(32, (3, 3), activation='relu'), input_shape=(30, 224, 224, 3)))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    model.add(TimeDistributed(Conv2D(64, (3, 3), activation='relu')))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    model.add(TimeDistributed(Flatten()))
    model.add(LSTM(64))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))  # Correct output for binary classification
    return model

In [None]:
m=build_cnn_lstm_model()

In [None]:
m.compile(optimizer=Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])


In [None]:

real_videos, real_labels = load_data(real_videos_path, 0)
fake_videos, fake_labels = load_data(fake_videos_path, 1)


print(f"Real videos loaded: {real_videos.shape[0]}")
print(f"Fake videos loaded: {fake_videos.shape[0]}")


if len(real_videos) == 0 or len(fake_videos) == 0:
    raise ValueError("No video data loaded. Check your file paths and data loading functions.")

X = np.concatenate((real_videos, fake_videos))
y = np.concatenate((real_labels, fake_labels))


print(f"y shape before split: {y.shape}")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")

y_train = y_train.flatten()
y_test = y_test.flatten()

def build_cnn_lstm_model():
    model = Sequential()
    model.add(TimeDistributed(Conv2D(32, (3, 3), activation='relu'), input_shape=(30, 224, 224, 3)))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    model.add(TimeDistributed(Conv2D(64, (3, 3), activation='relu')))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    model.add(TimeDistributed(Flatten()))
    model.add(LSTM(64))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    return model

model = build_cnn_lstm_model()
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='binary_crossentropy',
              metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=50, batch_size=8, validation_data=(X_test, y_test))


Real videos loaded: 100
Fake videos loaded: 100
y shape before split: (200,)
X_train shape: (160, 30, 224, 224, 3)
y_train shape: (160,)
X_test shape: (40, 30, 224, 224, 3)
y_test shape: (40,)
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50

In [None]:
# Print the final training accuracy
train_accuracy = history.history['accuracy'][-1]
print(f"Final Training Accuracy: {train_accuracy * 100:.2f}%")

# Print the final validation (testing) accuracy
val_accuracy = history.history['val_accuracy'][-1]
print(f"Final Validation (Test) Accuracy: {val_accuracy * 100:.2f}%")


In [None]:
plt.plot(history.history('accuracy'))
plt.plot(history.history('val_accuracy'))
plt.title("Model Accuracy")
plt.ylabel("Accuracy")
plt.xlabel("Epochs")
plt.legend(["Train","Test"],loc="upper")
plt.show()

plt.plot(history.history('loss'))
plt.plot(history.history('val_loss'))
plt.title("Model Loss")
plt.ylabel("Loss")
plt.xlabel("Epochs")
plt.legend(["Train","Test"],loc="upper")
plt.show()

In [None]:
y_train = y_train.flatten()
y_test = y_test.flatten()


In [None]:
# Train the model and capture the history
history = model.fit(X_train, y_train, epochs=50, batch_size=8, validation_data=(X_test, y_test))

# Print the final training accuracy
train_accuracy = history.history['accuracy'][-1]
print(f"Final Training Accuracy: {train_accuracy * 100:.2f}%")

# Print the final validation (testing) accuracy
val_accuracy = history.history['val_accuracy'][-1]
print(f"Final Validation (Test) Accuracy: {val_accuracy * 100:.2f}%")


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Final Training Accuracy: 54.37%
Final Validation (Test) Accuracy: 52.50%


In [None]:
def extract_frames(video_path, max_frames=30, resize=(224, 224)):
    """Extract frames from the uploaded video and preprocess for model prediction."""
    cap = cv2.VideoCapture(video_path)
    frames = []
    frame_count = 0
    while cap.isOpened() and frame_count < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, resize)
        frames.append(frame)
        frame_count += 1
    cap.release()

    # Ensure we have the required number of frames by padding if necessary
    if len(frames) < max_frames:
        print(f"Warning: {video_path} has only {len(frames)} frames. Padding with black frames.")
        while len(frames) < max_frames:
            frames.append(np.zeros((224, 224, 3), dtype=np.uint8))  # Add blank frame

    return np.array(frames)

def predict_video(video_path):
    frames = extract_frames(video_path)
    frames = frames[np.newaxis, ...]  # Add batch dimension
    prediction = model.predict(frames)
    return "Fake" if prediction[0][0] > 0.5 else "Real"

# Testing with a video
uploaded_video_path = '/content/drive/MyDrive/UMASS_D/First Semsters/Digital forensics/Celeb-DF/Celeb-synthesis/id0_id16_0000.mp4'  # Change to your uploaded video's path
result = predict_video(uploaded_video_path)
print(f"The uploaded video is predicted to be: {result}")


The uploaded video is predicted to be: Real


In [None]:
# Testing with a video
uploaded_video_path = '/content/drive/MyDrive/UMASS_D/First Semsters/Digital forensics/Celeb-DF/Celeb-real/id0_0003.mp4'  # Change to your uploaded video's path
result = predict_video(uploaded_video_path)
print(f"The uploaded video is predicted to be: {result}")


The uploaded video is predicted to be: Fake


In [None]:
import os
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, LSTM, TimeDistributed, Bidirectional
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import Xception
from sklearn.model_selection import train_test_split

# Model 1: CNN-LSTM
def build_cnn_lstm_model():
    model = Sequential()
    model.add(TimeDistributed(Conv2D(32, (3, 3), activation='relu'), input_shape=(30, 224, 224, 3)))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    model.add(TimeDistributed(Conv2D(64, (3, 3), activation='relu')))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    model.add(TimeDistributed(Flatten()))
    model.add(LSTM(64))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    return model

# Model 2: Xception-based CNN-LSTM
def build_xception_lstm_model():
    base_model = Xception(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    base_model.trainable = False
    model = Sequential()
    model.add(TimeDistributed(base_model, input_shape=(30, 224, 224, 3)))
    model.add(TimeDistributed(Flatten()))
    model.add(LSTM(64))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    return model

# Model 3: Bidirectional-LSTM
def build_bidirectional_lstm_model():
    model = Sequential()
    model.add(TimeDistributed(Conv2D(32, (3, 3), activation='relu'), input_shape=(30, 224, 224, 3)))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    model.add(TimeDistributed(Conv2D(64, (3, 3), activation='relu')))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    model.add(TimeDistributed(Flatten()))
    model.add(Bidirectional(LSTM(64)))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    return model

# Build and compile models
model1 = build_cnn_lstm_model()
model2 = build_xception_lstm_model()
model3 = build_bidirectional_lstm_model()

models = [model1, model2, model3]

for model in models:
    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Load pre-trained weights if available
# for i, model in enumerate(models):
#     model.load_weights(f'model_{i}_weights.h5')

# Frame extraction function
def extract_frames(video_path, max_frames=30, resize=(224, 224)):
    """Extract frames from the uploaded video and preprocess for model prediction."""
    cap = cv2.VideoCapture(video_path)
    frames = []
    frame_count = 0
    while cap.isOpened() and frame_count < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, resize)
        frames.append(frame)
        frame_count += 1
    cap.release()

    # Ensure we have the required number of frames by padding if necessary
    if len(frames) < max_frames:
        print(f"Warning: {video_path} has only {len(frames)} frames. Padding with black frames.")
        while len(frames) < max_frames:
            frames.append(np.zeros((224, 224, 3), dtype=np.uint8))  # Add blank frame

    return np.array(frames)

# Placeholder for training dataset
# Replace X_train, X_val, y_train, y_val with actual data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Train each model for 10 epochs
epochs = 10
for i, model in enumerate(models):
    print(f"Training Model {i+1}")
    history = model.fit(X_train, y_train, epochs=epochs, batch_size=8, validation_data=(X_val, y_val))

    # Optionally, save weights after training
    model.save_weights(f'model_{i}_weights.h5')

    # Print final validation accuracy for each model
    val_accuracy = history.history['val_accuracy'][-1]
    print(f"Final Validation Accuracy for Model {i+1}: {val_accuracy * 100:.2f}%\n")

# Ensemble prediction function
def predict_video_ensemble(video_path):
    frames = extract_frames(video_path)
    frames = frames[np.newaxis, ...]  # Add batch dimension

    # Get predictions from each model
    predictions = [model.predict(frames)[0][0] for model in models]

    # Majority vote or average prediction
    avg_prediction = np.mean(predictions)
    return "Fake" if avg_prediction > 0.5 else "Real"

# Testing with a video
uploaded_video_path = '/content/drive/MyDrive/UMASS_D/First Semsters/Digital forensics/Celeb-DF/Celeb-real/id0_0003.mp4'  # Change to your uploaded video's path
result = predict_video_ensemble(uploaded_video_path)
print(f"The uploaded video is predicted to be: {result}")


Training Model 1
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Final Validation Accuracy for Model 1: 47.50%

Training Model 2
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Final Validation Accuracy for Model 2: 55.00%

Training Model 3
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Final Validation Accuracy for Model 3: 52.50%









The uploaded video is predicted to be: Real


In [None]:
# Testing with a video
uploaded_video_path = '/content/drive/MyDrive/UMASS_D/First Semsters/Digital forensics/Celeb-DF/Celeb-real/id0_0003.mp4'  # Change to your uploaded video's path
result = predict_video_ensemble(uploaded_video_path)
print(f"The uploaded video is predicted to be: {result}")

The uploaded video is predicted to be: Real


In [None]:
uploaded_video_path = '/content/drive/MyDrive/UMASS_D/First Semsters/Digital forensics/Celeb-DF/Celeb-real/id10_0004.mp4 ' # Change to your uploaded video's path
result = predict_video_ensemble(uploaded_video_path)
print(f"The uploaded video is predicted to be: {result}")


The uploaded video is predicted to be: Fake


In [None]:
import os
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, LSTM, TimeDistributed, Bidirectional
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import Xception
from sklearn.model_selection import train_test_split

# Model 1: CNN-LSTM
def build_cnn_lstm_model():
    model = Sequential()
    model.add(TimeDistributed(Conv2D(32, (3, 3), activation='relu'), input_shape=(30, 224, 224, 3)))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    model.add(TimeDistributed(Conv2D(64, (3, 3), activation='relu')))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    model.add(TimeDistributed(Flatten()))
    model.add(LSTM(64))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    return model

# Model 2: Xception-based CNN-LSTM
def build_xception_lstm_model():
    base_model = Xception(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    base_model.trainable = False
    model = Sequential()
    model.add(TimeDistributed(base_model, input_shape=(30, 224, 224, 3)))
    model.add(TimeDistributed(Flatten()))
    model.add(LSTM(64))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    return model

# Model 3: Bidirectional-LSTM
def build_bidirectional_lstm_model():
    model = Sequential()
    model.add(TimeDistributed(Conv2D(32, (3, 3), activation='relu'), input_shape=(30, 224, 224, 3)))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    model.add(TimeDistributed(Conv2D(64, (3, 3), activation='relu')))
    model.add(TimeDistributed(MaxPooling2D((2, 2))))
    model.add(TimeDistributed(Flatten()))
    model.add(Bidirectional(LSTM(64)))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    return model

# Build and compile models
model1 = build_cnn_lstm_model()
model2 = build_xception_lstm_model()
model3 = build_bidirectional_lstm_model()

models = [model1, model2, model3]

for model in models:
    model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Load pre-trained weights if available
# for i, model in enumerate(models):
#     model.load_weights(f'model_{i}_weights.h5')

# Frame extraction function
def extract_frames(video_path, max_frames=30, resize=(224, 224)):
    """Extract frames from the uploaded video and preprocess for model prediction."""
    cap = cv2.VideoCapture(video_path)
    frames = []
    frame_count = 0
    while cap.isOpened() and frame_count < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, resize)
        frames.append(frame)
        frame_count += 1
    cap.release()

    # Ensure we have the required number of frames by padding if necessary
    if len(frames) < max_frames:
        print(f"Warning: {video_path} has only {len(frames)} frames. Padding with black frames.")
        while len(frames) < max_frames:
            frames.append(np.zeros((224, 224, 3), dtype=np.uint8))  # Add blank frame

    return np.array(frames)

# Placeholder for training dataset
# Replace X_train, X_val, y_train, y_val with actual data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Train each model for 10 epochs
epochs = 50
for i, model in enumerate(models):
    print(f"Training Model {i+1}")
    history = model.fit(X_train, y_train, epochs=epochs, batch_size=8, validation_data=(X_val, y_val))

    # Optionally, save weights after training
    model.save_weights(f'model_{i}_weights.h5')

    # Print final validation accuracy for each model
    val_accuracy = history.history['val_accuracy'][-1]
    print(f"Final Validation Accuracy for Model {i+1}: {val_accuracy * 100:.2f}%\n")

# Ensemble prediction function
def predict_video_ensemble(video_path):
    frames = extract_frames(video_path)
    frames = frames[np.newaxis, ...]  # Add batch dimension

    # Get predictions from each model
    predictions = [model.predict(frames)[0][0] for model in models]

    # Majority vote or average prediction
    avg_prediction = np.mean(predictions)
    return "Fake" if avg_prediction > 0.5 else "Real"

# Testing with a video
uploaded_video_path = '/content/drive/MyDrive/UMASS_D/First Semsters/Digital forensics/Celeb-DF/Celeb-real/id0_0003.mp4'  # Change to your uploaded video's path
result = predict_video_ensemble(uploaded_video_path)
print(f"The uploaded video is predicted to be: {result}")


Training Model 1
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Final Validation Accuracy for Model 1: 47.50%

Training Model 2
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
E

In [None]:
#combine three diffeent model and run into one set of epoch
import os
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, LSTM, TimeDistributed, Bidirectional, concatenate
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import Xception

# Frame extraction function
def extract_frames(video_path, max_frames=30, resize=(224, 224)):
    """Extract frames from the uploaded video and preprocess for model prediction."""
    cap = cv2.VideoCapture(video_path)
    frames = []
    frame_count = 0
    while cap.isOpened() and frame_count < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, resize)
        frames.append(frame)
        frame_count += 1
    cap.release()

    # Ensure we have the required number of frames by padding if necessary
    if len(frames) < max_frames:
        print(f"Warning: {video_path} has only {len(frames)} frames. Padding with black frames.")
        while len(frames) < max_frames:
            frames.append(np.zeros((224, 224, 3), dtype=np.uint8))  # Add blank frame

    return np.array(frames)

# Combined ensemble model
def build_combined_model():
    # Input layer for video frames
    input_layer = Input(shape=(30, 224, 224, 3))

    # Branch 1: CNN-LSTM
    cnn_lstm_branch = TimeDistributed(Conv2D(32, (3, 3), activation='relu'))(input_layer)
    cnn_lstm_branch = TimeDistributed(MaxPooling2D((2, 2)))(cnn_lstm_branch)
    cnn_lstm_branch = TimeDistributed(Conv2D(64, (3, 3), activation='relu'))(cnn_lstm_branch)
    cnn_lstm_branch = TimeDistributed(MaxPooling2D((2, 2)))(cnn_lstm_branch)
    cnn_lstm_branch = TimeDistributed(Flatten())(cnn_lstm_branch)
    cnn_lstm_branch = LSTM(64)(cnn_lstm_branch)

    # Branch 2: Xception-LSTM
    xception_base = Xception(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    xception_base.trainable = False
    xception_branch = TimeDistributed(xception_base)(input_layer)
    xception_branch = TimeDistributed(Flatten())(xception_branch)
    xception_branch = LSTM(64)(xception_branch)

    # Branch 3: Bidirectional-LSTM
    bidirectional_branch = TimeDistributed(Conv2D(32, (3, 3), activation='relu'))(input_layer)
    bidirectional_branch = TimeDistributed(MaxPooling2D((2, 2)))(bidirectional_branch)
    bidirectional_branch = TimeDistributed(Conv2D(64, (3, 3), activation='relu'))(bidirectional_branch)
    bidirectional_branch = TimeDistributed(MaxPooling2D((2, 2)))(bidirectional_branch)
    bidirectional_branch = TimeDistributed(Flatten())(bidirectional_branch)
    bidirectional_branch = Bidirectional(LSTM(64))(bidirectional_branch)

    # Concatenate the branches
    combined = concatenate([cnn_lstm_branch, xception_branch, bidirectional_branch])

    # Fully connected layer for classification
    dense_layer = Dense(64, activation='relu')(combined)
    dropout_layer = Dropout(0.5)(dense_layer)
    output_layer = Dense(1, activation='sigmoid')(dropout_layer)

    # Model
    model = Model(inputs=input_layer, outputs=output_layer)
    return model

# Instantiate and compile the model
combined_model = build_combined_model()
combined_model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Placeholder for training dataset
# Replace X_train, X_val, y_train, y_val with actual data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model for 10 epochs
epochs = 50
history = combined_model.fit(X_train, y_train, epochs=epochs, batch_size=8, validation_data=(X_val, y_val))

# Print final validation accuracy
val_accuracy = history.history['val_accuracy'][-1]
print(f"Final Validation Accuracy: {val_accuracy * 100:.2f}%")

# Ensemble prediction function
def predict_video(video_path):
    frames = extract_frames(video_path)
    frames = frames[np.newaxis, ...]  # Add batch dimension
    prediction = combined_model.predict(frames)
    return "Fake" if prediction[0][0] > 0.5 else "Real"

# Testing with a video
uploaded_video_path = '/content/drive/MyDrive/UMASS_D/First Semsters/Digital forensics/Celeb-DF/Celeb-real/id0_0003.mp4'  # Change to your uploaded video's path
result = predict_video(uploaded_video_path)
print(f"The uploaded video is predicted to be: {result}")

uploaded_video_path = '/content/drive/MyDrive/UMASS_D/First Semsters/Digital forensics/Celeb-DF/Celeb-real/id10_0004.mp4 '  # Change to your uploaded video's path
result = predict_video(uploaded_video_path)
print(f"The uploaded video is predicted to be: {result}")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Final Validation Accuracy: 47.50%
The uploaded video is predicted to be: Fake
The uploaded video is predicted to be: Real


In [None]:
#combine three diffeent model and run into one set of epoch
import os
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, LSTM, TimeDistributed, Bidirectional, concatenate
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import Xception

# Frame extraction function
def extract_frames(video_path, max_frames=30, resize=(224, 224)):
    """Extract frames from the uploaded video and preprocess for model prediction."""
    cap = cv2.VideoCapture(video_path)
    frames = []
    frame_count = 0
    while cap.isOpened() and frame_count < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, resize)
        frames.append(frame)
        frame_count += 1
    cap.release()

    # Ensure we have the required number of frames by padding if necessary
    if len(frames) < max_frames:
        print(f"Warning: {video_path} has only {len(frames)} frames. Padding with black frames.")
        while len(frames) < max_frames:
            frames.append(np.zeros((224, 224, 3), dtype=np.uint8))  # Add blank frame

    return np.array(frames)

# Combined ensemble model
def build_combined_model():
    # Input layer for video frames
    input_layer = Input(shape=(30, 224, 224, 3))

    # Branch 1: CNN-LSTM
    cnn_lstm_branch = TimeDistributed(Conv2D(32, (3, 3), activation='relu'))(input_layer)
    cnn_lstm_branch = TimeDistributed(MaxPooling2D((2, 2)))(cnn_lstm_branch)
    cnn_lstm_branch = TimeDistributed(Conv2D(64, (3, 3), activation='relu'))(cnn_lstm_branch)
    cnn_lstm_branch = TimeDistributed(MaxPooling2D((2, 2)))(cnn_lstm_branch)
    cnn_lstm_branch = TimeDistributed(Flatten())(cnn_lstm_branch)
    cnn_lstm_branch = LSTM(64)(cnn_lstm_branch)

    # Branch 2: Xception-LSTM
    xception_base = Xception(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    xception_base.trainable = False
    xception_branch = TimeDistributed(xception_base)(input_layer)
    xception_branch = TimeDistributed(Flatten())(xception_branch)
    xception_branch = LSTM(64)(xception_branch)

    # Branch 3: Bidirectional-LSTM
    bidirectional_branch = TimeDistributed(Conv2D(32, (3, 3), activation='relu'))(input_layer)
    bidirectional_branch = TimeDistributed(MaxPooling2D((2, 2)))(bidirectional_branch)
    bidirectional_branch = TimeDistributed(Conv2D(64, (3, 3), activation='relu'))(bidirectional_branch)
    bidirectional_branch = TimeDistributed(MaxPooling2D((2, 2)))(bidirectional_branch)
    bidirectional_branch = TimeDistributed(Flatten())(bidirectional_branch)
    bidirectional_branch = Bidirectional(LSTM(64))(bidirectional_branch)

    # Concatenate the branches
    combined = concatenate([cnn_lstm_branch, xception_branch, bidirectional_branch])

    # Fully connected layer for classification
    dense_layer = Dense(64, activation='relu')(combined)
    dropout_layer = Dropout(0.5)(dense_layer)
    output_layer = Dense(1, activation='sigmoid')(dropout_layer)

    # Model
    model = Model(inputs=input_layer, outputs=output_layer)
    return model

# Instantiate and compile the model
combined_model = build_combined_model()
combined_model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Placeholder for training dataset
# Replace X_train, X_val, y_train, y_val with actual data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model for 10 epochs
epochs = 50
history = combined_model.fit(X_train, y_train, epochs=epochs, batch_size=8, validation_data=(X_val, y_val))

# Print final validation accuracy
val_accuracy = history.history['val_accuracy'][-1]
print(f"Final Validation Accuracy: {val_accuracy * 100:.2f}%")

# Ensemble prediction function
def predict_video(video_path):
    frames = extract_frames(video_path)
    frames = frames[np.newaxis, ...]  # Add batch dimension
    prediction = combined_model.predict(frames)
    return "Fake" if prediction[0][0] > 0.5 else "Real"

# Testing with a video
uploaded_video_path = '/content/drive/MyDrive/UMASS_D/First Semsters/Digital forensics/Celeb-DF/Celeb-real/id0_0003.mp4'  # Change to your uploaded video's path
result = predict_video(uploaded_video_path)
print(f"The uploaded video is predicted to be: {result}")

uploaded_video_path = '/content/drive/MyDrive/UMASS_D/First Semsters/Digital forensics/Celeb-DF/Celeb-real/id10_0004.mp4 '  # Change to your uploaded video's path
result = predict_video(uploaded_video_path)
print(f"The uploaded video is predicted to be: {result}")


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50

In [None]:
#tranfer learning
import os
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, GlobalAveragePooling2D, TimeDistributed, LSTM, Dropout
from tensorflow.keras.applications import Xception
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

# Paths to datasets
real_videos_path = '/content/drive/MyDrive/UMASS_D/First Semsters/Digital forensics/Celeb-DF/Celeb-real'
fake_videos_path = '/content/drive/MyDrive/UMASS_D/First Semsters/Digital forensics/Celeb-DF/Celeb-synthesis'

# Function to extract frames from videos
def extract_frames(video_path, max_frames=30, resize=(224, 224)):
    cap = cv2.VideoCapture(video_path)
    frames = []
    frame_count = 0
    while cap.isOpened() and frame_count < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, resize)
        frames.append(frame)
        frame_count += 1
    cap.release()
    if len(frames) < max_frames:
        while len(frames) < max_frames:
            frames.append(np.zeros((224, 224, 3), dtype=np.uint8))  # Padding if frames < max_frames
    return np.array(frames)

# Load data
def load_data(data_dir, label, max_videos=100):
    videos, labels = [], []
    for i, filename in enumerate(os.listdir(data_dir)):
        if i == max_videos:
            break
        filepath = os.path.join(data_dir, filename)
        frames = extract_frames(filepath)
        if len(frames) == 30:  # Check that each video has 30 frames
            videos.append(frames)
            labels.append(label)
        else:
            print(f"Skipped {filename}: Not enough frames or corrupted video.")
    return np.array(videos), np.array(labels)

# Load real and fake video datasets
real_videos, real_labels = load_data(real_videos_path, label=0)
fake_videos, fake_labels = load_data(fake_videos_path, label=1)

# Combine and shuffle the dataset
X = np.concatenate((real_videos, fake_videos))
y = np.concatenate((real_labels, fake_labels))
X, y = shuffle(X, y, random_state=42)

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Build transfer learning model with Xception as base
def build_transfer_learning_model():
    base_model = Xception(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    base_model.trainable = False  # Freeze the base model layers

    model = Sequential()
    model.add(TimeDistributed(base_model, input_shape=(30, 224, 224, 3)))
    model.add(TimeDistributed(GlobalAveragePooling2D()))
    model.add(LSTM(64))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    return model

# Compile the model
model = build_transfer_learning_model()
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(
    X_train, y_train,
    epochs=10,
    batch_size=8,
    validation_data=(X_val, y_val)
)

# Evaluate model performance
train_accuracy = history.history['accuracy'][-1]
val_accuracy = history.history['val_accuracy'][-1]
print(f"Final Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Final Validation Accuracy: {val_accuracy * 100:.2f}%")


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Final Training Accuracy: 58.75%
Final Validation Accuracy: 37.50%


In [None]:
#tranfer learning
import os
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, GlobalAveragePooling2D, TimeDistributed, LSTM, Dropout
from tensorflow.keras.applications import Xception
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

# Paths to datasets
real_videos_path = '/content/drive/MyDrive/UMASS_D/First Semsters/Digital forensics/Celeb-DF/Celeb-real'
fake_videos_path = '/content/drive/MyDrive/UMASS_D/First Semsters/Digital forensics/Celeb-DF/Celeb-synthesis'

# Function to extract frames from videos
def extract_frames(video_path, max_frames=30, resize=(224, 224)):
    cap = cv2.VideoCapture(video_path)
    frames = []
    frame_count = 0
    while cap.isOpened() and frame_count < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, resize)
        frames.append(frame)
        frame_count += 1
    cap.release()
    if len(frames) < max_frames:
        while len(frames) < max_frames:
            frames.append(np.zeros((224, 224, 3), dtype=np.uint8))  # Padding if frames < max_frames
    return np.array(frames)

# Load data
def load_data(data_dir, label, max_videos=100):
    videos, labels = [], []
    for i, filename in enumerate(os.listdir(data_dir)):
        if i == max_videos:
            break
        filepath = os.path.join(data_dir, filename)
        frames = extract_frames(filepath)
        if len(frames) == 30:  # Check that each video has 30 frames
            videos.append(frames)
            labels.append(label)
        else:
            print(f"Skipped {filename}: Not enough frames or corrupted video.")
    return np.array(videos), np.array(labels)

# Load real and fake video datasets
real_videos, real_labels = load_data(real_videos_path, label=0)
fake_videos, fake_labels = load_data(fake_videos_path, label=1)

# Combine and shuffle the dataset
X = np.concatenate((real_videos, fake_videos))
y = np.concatenate((real_labels, fake_labels))
X, y = shuffle(X, y, random_state=42)

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Build transfer learning model with Xception as base
def build_transfer_learning_model():
    base_model = Xception(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    base_model.trainable = False  # Freeze the base model layers

    model = Sequential()
    model.add(TimeDistributed(base_model, input_shape=(30, 224, 224, 3)))
    model.add(TimeDistributed(GlobalAveragePooling2D()))
    model.add(LSTM(64))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))
    return model

# Compile the model
model = build_transfer_learning_model()
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=8,
    validation_data=(X_val, y_val)
)

# Evaluate model performance
train_accuracy = history.history['accuracy'][-1]
val_accuracy = history.history['val_accuracy'][-1]
print(f"Final Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Final Validation Accuracy: {val_accuracy * 100:.2f}%")

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Final Training Accuracy: 70.63%
Final Validation Accuracy: 47.50%


In [None]:
import os
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, GlobalAveragePooling2D, TimeDistributed, LSTM, Dropout, Input, Concatenate
from tensorflow.keras.applications import Xception, VGG16
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

# Paths to datasets
real_videos_path = '/content/drive/MyDrive/UMASS_D/First Semsters/Digital forensics/Celeb-DF/Celeb-real'
fake_videos_path = '/content/drive/MyDrive/UMASS_D/First Semsters/Digital forensics/Celeb-DF/Celeb-synthesis'

# Function to extract frames from videos
def extract_frames(video_path, max_frames=30, resize=(224, 224)):
    cap = cv2.VideoCapture(video_path)
    frames = []
    frame_count = 0
    while cap.isOpened() and frame_count < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, resize)
        frames.append(frame)
        frame_count += 1
    cap.release()
    if len(frames) < max_frames:
        while len(frames) < max_frames:
            frames.append(np.zeros((224, 224, 3), dtype=np.uint8))  # Padding if frames < max_frames
    return np.array(frames)

# Load data
def load_data(data_dir, label, max_videos=100):
    videos, labels = [], []
    for i, filename in enumerate(os.listdir(data_dir)):
        if i == max_videos:
            break
        filepath = os.path.join(data_dir, filename)
        frames = extract_frames(filepath)
        if len(frames) == 30:  # Check that each video has 30 frames
            videos.append(frames)
            labels.append(label)
        else:
            print(f"Skipped {filename}: Not enough frames or corrupted video.")
    return np.array(videos), np.array(labels)

# Load real and fake video datasets
real_videos, real_labels = load_data(real_videos_path, label=0)
fake_videos, fake_labels = load_data(fake_videos_path, label=1)

# Combine and shuffle the dataset
X = np.concatenate((real_videos, fake_videos))
y = np.concatenate((real_labels, fake_labels))
X, y = shuffle(X, y, random_state=42)

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Build transfer learning model with Xception and VGG16 as base
def build_transfer_learning_model():
    # Define a single input layer
    input_layer = Input(shape=(30, 224, 224, 3))

    # Xception branch
    xception_base = Xception(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    xception_base.trainable = False
    xception_branch = TimeDistributed(xception_base)(input_layer)
    xception_branch = TimeDistributed(GlobalAveragePooling2D())(xception_branch)

    # VGG16 branch
    vgg_base = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    vgg_base.trainable = False
    vgg_branch = TimeDistributed(vgg_base)(input_layer)
    vgg_branch = TimeDistributed(GlobalAveragePooling2D())(vgg_branch)

    # Concatenate outputs of both branches
    concatenated = Concatenate()([xception_branch, vgg_branch])

    # LSTM and Dense layers
    x = LSTM(64)(concatenated)
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.5)(x)
    outputs = Dense(1, activation='sigmoid')(x)

    # Create the model with input_layer as the input
    model = Model(inputs=input_layer, outputs=outputs)
    return model

# Compile the model
model = build_transfer_learning_model()
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(
    X_train, y_train,
    epochs=10,
    batch_size=8,
    validation_data=(X_val, y_val)
)

# Evaluate model performance
train_accuracy = history.history['accuracy'][-1]
val_accuracy = history.history['val_accuracy'][-1]
print(f"Final Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Final Validation Accuracy: {val_accuracy * 100:.2f}%")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Final Training Accuracy: 58.75%
Final Validation Accuracy: 40.00%


In [None]:
import os
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, GlobalAveragePooling2D, TimeDistributed, LSTM, Dropout, Input, Concatenate
from tensorflow.keras.applications import Xception, VGG16
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

# Paths to datasets
real_videos_path = '/content/drive/MyDrive/Celeb-DF/Celeb-real'
fake_videos_path = '/content/drive/MyDrive/Celeb-DF/Celeb-synthesis'

# Function to extract frames from videos
def extract_frames(video_path, max_frames=30, resize=(224, 224)):
    cap = cv2.VideoCapture(video_path)
    frames = []
    frame_count = 0
    while cap.isOpened() and frame_count < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, resize)
        frames.append(frame)
        frame_count += 1
    cap.release()
    if len(frames) < max_frames:
        while len(frames) < max_frames:
            frames.append(np.zeros((224, 224, 3), dtype=np.uint8))  # Padding if frames < max_frames
    return np.array(frames)

# Load data
def load_data(data_dir, label, max_videos=100):
    videos, labels = [], []
    for i, filename in enumerate(os.listdir(data_dir)):
        if i == max_videos:
            break
        filepath = os.path.join(data_dir, filename)
        frames = extract_frames(filepath)
        if len(frames) == 30:  # Check that each video has 30 frames
            videos.append(frames)
            labels.append(label)
        else:
            print(f"Skipped {filename}: Not enough frames or corrupted video.")
    return np.array(videos), np.array(labels)

# Load real and fake video datasets
real_videos, real_labels = load_data(real_videos_path, label=0)
fake_videos, fake_labels = load_data(fake_videos_path, label=1)

# Combine and shuffle the dataset
X = np.concatenate((real_videos, fake_videos))
y = np.concatenate((real_labels, fake_labels))
X, y = shuffle(X, y, random_state=42)

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Build transfer learning model with Xception and VGG16 as base
def build_transfer_learning_model():
    # Define a single input layer
    input_layer = Input(shape=(30, 224, 224, 3))

    # Xception branch
    xception_base = Xception(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    xception_base.trainable = False
    xception_branch = TimeDistributed(xception_base)(input_layer)
    xception_branch = TimeDistributed(GlobalAveragePooling2D())(xception_branch)

    # VGG16 branch
    vgg_base = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
    vgg_base.trainable = False
    vgg_branch = TimeDistributed(vgg_base)(input_layer)
    vgg_branch = TimeDistributed(GlobalAveragePooling2D())(vgg_branch)

    # Concatenate outputs of both branches
    concatenated = Concatenate()([xception_branch, vgg_branch])

    # LSTM and Dense layers
    x = LSTM(64)(concatenated)
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.5)(x)
    outputs = Dense(1, activation='sigmoid')(x)

    # Create the model with input_layer as the input
    model = Model(inputs=input_layer, outputs=outputs)
    return model

# Compile the model
model = build_transfer_learning_model()
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=8,
    validation_data=(X_val, y_val)
)

# Evaluate model performance
train_accuracy = history.history['accuracy'][-1]
val_accuracy = history.history['val_accuracy'][-1]
print(f"Final Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Final Validation Accuracy: {val_accuracy * 100:.2f}%")


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Final Training Accuracy: 76.25%
Final Validation Accuracy: 42.50%


In [None]:
import os
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Conv2D, GlobalAveragePooling2D, TimeDistributed, LSTM, Dropout, Input, Concatenate, Attention, Reshape,Flatten
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

# Paths to datasets
real_videos_path = '/content/drive/MyDrive/UMASS_D/First Semsters/Digital forensics/Celeb-DF/Celeb-real'
fake_videos_path = '/content/drive/MyDrive/UMASS_D/First Semsters/Digital forensics/Celeb-DF/Celeb-synthesis'

# Function to extract frames from videos
def extract_frames(video_path, max_frames=30, resize=(224, 224)):
    cap = cv2.VideoCapture(video_path)
    frames = []
    frame_count = 0
    while cap.isOpened() and frame_count < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, resize)
        frames.append(frame)
        frame_count += 1
    cap.release()
    if len(frames) < max_frames:
        while len(frames) < max_frames:
            frames.append(np.zeros((224, 224, 3), dtype=np.uint8))  # Padding if frames < max_frames
    return np.array(frames)

# Load data
def load_data(data_dir, label, max_videos=100):
    videos, labels = [], []
    for i, filename in enumerate(os.listdir(data_dir)):
        if i == max_videos:
            break
        filepath = os.path.join(data_dir, filename)
        frames = extract_frames(filepath)
        if len(frames) == 30:  # Check that each video has 30 frames
            videos.append(frames)
            labels.append(label)
        else:
            print(f"Skipped {filename}: Not enough frames or corrupted video.")
    return np.array(videos), np.array(labels)

# Load real and fake video datasets
real_videos, real_labels = load_data(real_videos_path, label=0)
fake_videos, fake_labels = load_data(fake_videos_path, label=1)

# Combine and shuffle the dataset
X = np.concatenate((real_videos, fake_videos))
y = np.concatenate((real_labels, fake_labels))
X, y = shuffle(X, y, random_state=42)

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the model with CNN and Attention Mechanism
def build_attention_cnn_model():
    input_layer = Input(shape=(30, 224, 224, 3))  # 30 frames, 224x224 resolution, 3 channels

    # CNN feature extractor for each frame
    cnn = TimeDistributed(Conv2D(32, (3, 3), activation='relu'))(input_layer)
    cnn = TimeDistributed(Conv2D(64, (3, 3), activation='relu'))(cnn)
    cnn = TimeDistributed(GlobalAveragePooling2D())(cnn)  # Pooling to reduce spatial dimensions

    # Apply Attention mechanism
    attention = Attention()([cnn, cnn])  # Self-attention (query and value are the same)

    # Flatten the attention output to feed into LSTM
    attention = TimeDistributed(Flatten())(attention)

    # Apply LSTM to capture temporal dependencies between frames
    x = LSTM(64)(attention)

    # Dense and Dropout layers for classification
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.5)(x)

    # Output layer for binary classification
    outputs = Dense(1, activation='sigmoid')(x)

    # Create and compile the model
    model = Model(inputs=input_layer, outputs=outputs)
    return model

# Compile the model
model = build_attention_cnn_model()
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(
    X_train, y_train,
    epochs=10,
    batch_size=8,
    validation_data=(X_val, y_val)
)

# Evaluate model performance
train_accuracy = history.history['accuracy'][-1]
val_accuracy = history.history['val_accuracy'][-1]
print(f"Final Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Final Validation Accuracy: {val_accuracy * 100:.2f}%")


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Final Training Accuracy: 48.12%
Final Validation Accuracy: 45.00%


In [None]:
import os
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Conv2D, GlobalAveragePooling2D, TimeDistributed, LSTM, Dropout, Input, Concatenate, Attention, Reshape,Flatten
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

# Paths to datasets
real_videos_path = '/content/drive/MyDrive/UMASS_D/First Semsters/Digital forensics/Celeb-DF/Celeb-real'
fake_videos_path = '/content/drive/MyDrive/UMASS_D/First Semsters/Digital forensics/Celeb-DF/Celeb-synthesis'

# Function to extract frames from videos
def extract_frames(video_path, max_frames=30, resize=(224, 224)):
    cap = cv2.VideoCapture(video_path)
    frames = []
    frame_count = 0
    while cap.isOpened() and frame_count < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, resize)
        frames.append(frame)
        frame_count += 1
    cap.release()
    if len(frames) < max_frames:
        while len(frames) < max_frames:
            frames.append(np.zeros((224, 224, 3), dtype=np.uint8))  # Padding if frames < max_frames
    return np.array(frames)

# Load data
def load_data(data_dir, label, max_videos=100):
    videos, labels = [], []
    for i, filename in enumerate(os.listdir(data_dir)):
        if i == max_videos:
            break
        filepath = os.path.join(data_dir, filename)
        frames = extract_frames(filepath)
        if len(frames) == 30:  # Check that each video has 30 frames
            videos.append(frames)
            labels.append(label)
        else:
            print(f"Skipped {filename}: Not enough frames or corrupted video.")
    return np.array(videos), np.array(labels)

# Load real and fake video datasets
real_videos, real_labels = load_data(real_videos_path, label=0)
fake_videos, fake_labels = load_data(fake_videos_path, label=1)

# Combine and shuffle the dataset
X = np.concatenate((real_videos, fake_videos))
y = np.concatenate((real_labels, fake_labels))
X, y = shuffle(X, y, random_state=42)

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the model with CNN and Attention Mechanism
def build_attention_cnn_model():
    input_layer = Input(shape=(30, 224, 224, 3))  # 30 frames, 224x224 resolution, 3 channels

    # CNN feature extractor for each frame
    cnn = TimeDistributed(Conv2D(32, (3, 3), activation='relu'))(input_layer)
    cnn = TimeDistributed(Conv2D(64, (3, 3), activation='relu'))(cnn)
    cnn = TimeDistributed(GlobalAveragePooling2D())(cnn)  # Pooling to reduce spatial dimensions

    # Apply Attention mechanism
    attention = Attention()([cnn, cnn])  # Self-attention (query and value are the same)

    # Flatten the attention output to feed into LSTM
    attention = TimeDistributed(Flatten())(attention)

    # Apply LSTM to capture temporal dependencies between frames
    x = LSTM(64)(attention)

    # Dense and Dropout layers for classification
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.5)(x)

    # Output layer for binary classification
    outputs = Dense(1, activation='sigmoid')(x)

    # Create and compile the model
    model = Model(inputs=input_layer, outputs=outputs)
    return model

# Compile the model
model = build_attention_cnn_model()
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=8,
    validation_data=(X_val, y_val)
)

# Evaluate model performance
train_accuracy = history.history['accuracy'][-1]
val_accuracy = history.history['val_accuracy'][-1]
print(f"Final Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Final Validation Accuracy: {val_accuracy * 100:.2f}%")


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Final Training Accuracy: 50.63%
Final Validation Accuracy: 45.00%


In [None]:
import os
import numpy as np
import cv2
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Conv2D, GlobalAveragePooling2D, TimeDistributed, LSTM, Dropout, Input, Attention, Flatten
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import matplotlib.pyplot as plt

# Paths to datasets
real_videos_path = '/content/drive/MyDrive/UMASS_D/First Semsters/Digital forensics/Celeb-DF/Celeb-real'
fake_videos_path = '/content/drive/MyDrive/UMASS_D/First Semsters/Digital forensics/Celeb-DF/Celeb-synthesis'

# Function to extract frames from videos
def extract_frames(video_path, max_frames=30, resize=(224, 224)):
    cap = cv2.VideoCapture(video_path)
    frames = []
    frame_count = 0
    while cap.isOpened() and frame_count < max_frames:
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, resize)
        frames.append(frame)
        frame_count += 1
    cap.release()
    if len(frames) < max_frames:
        while len(frames) < max_frames:
            frames.append(np.zeros((224, 224, 3), dtype=np.uint8))  # Padding if frames < max_frames
    return np.array(frames)

# Load data
def load_data(data_dir, label, max_videos=100):
    videos, labels = [], []
    for i, filename in enumerate(os.listdir(data_dir)):
        if i == max_videos:
            break
        filepath = os.path.join(data_dir, filename)
        frames = extract_frames(filepath)
        if len(frames) == 30:  # Check that each video has 30 frames
            videos.append(frames)
            labels.append(label)
        else:
            print(f"Skipped {filename}: Not enough frames or corrupted video.")
    return np.array(videos), np.array(labels)

# Load real and fake video datasets
real_videos, real_labels = load_data(real_videos_path, label=0)
fake_videos, fake_labels = load_data(fake_videos_path, label=1)

# Combine and shuffle the dataset
X = np.concatenate((real_videos, fake_videos))
y = np.concatenate((real_labels, fake_labels))
X, y = shuffle(X, y, random_state=42)

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the model with CNN and Attention Mechanism
def build_attention_cnn_model():
    input_layer = Input(shape=(30, 224, 224, 3))  # 30 frames, 224x224 resolution, 3 channels

    # CNN feature extractor for each frame
    cnn = TimeDistributed(Conv2D(32, (3, 3), activation='relu'))(input_layer)
    cnn = TimeDistributed(Conv2D(64, (3, 3), activation='relu'))(cnn)
    cnn = TimeDistributed(GlobalAveragePooling2D())(cnn)  # Pooling to reduce spatial dimensions

    # Apply Attention mechanism
    attention = Attention()([cnn, cnn])  # Self-attention (query and value are the same)

    # Flatten the attention output to feed into LSTM
    attention = TimeDistributed(Flatten())(attention)

    # Apply LSTM to capture temporal dependencies between frames
    x = LSTM(64)(attention)

    # Dense and Dropout layers for classification
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.5)(x)

    # Output layer for binary classification
    outputs = Dense(1, activation='sigmoid')(x)

    # Create and compile the model
    model = Model(inputs=input_layer, outputs=outputs)
    return model

# Compile the model
model = build_attention_cnn_model()
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=8,
    validation_data=(X_val, y_val)
)

# Evaluate model performance
train_accuracy = history.history['accuracy'][-1]
val_accuracy = history.history['val_accuracy'][-1]
print(f"Final Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Final Validation Accuracy: {val_accuracy * 100:.2f}%")

# Plot training and validation accuracy
plt.figure(figsize=(10, 6))
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()
plt.grid(True)
plt.show()


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100