In [None]:
!pip install tensorflow
!pip install keras
!pip install opencv-python
!pip install matplotlib



In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import Xception
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import cv2
import os

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Set the dataset directory
video_dir = '/content/drive/MyDrive/deepfake_dataset/'


Mounted at /content/drive


In [None]:
import json
import cv2
import os
import numpy as np
import pickle
import time

# Paths
video_dir = '/content/drive/MyDrive/deepfake_dataset/'
progress_path = '/content/drive/MyDrive/deepfake_progress.pkl'

# Load metadata
with open(os.path.join(video_dir, 'metadata.json')) as f:
    metadata = json.load(f)

# Function to extract frames from a video file
def extract_frames(video_path, frames_per_video=5):
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_interval = max(1, frame_count // frames_per_video)

    frames = []

    for i in range(0, frame_count, frame_interval):
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()
        if ret:
            frame = cv2.resize(frame, (299, 299))
            frames.append(frame)

    cap.release()
    return frames

# Process all videos to extract frames
def process_videos(metadata, base_path, frames_per_video=5, start_idx=0):
    frames = []
    labels = []

    start_time = time.time()
    total_videos = len(metadata)

    for idx, (video_file, info) in enumerate(metadata.items()):
        if idx < start_idx:
            continue
        if video_file.endswith('.mp4'):
            video_path = os.path.join(base_path, video_file)
            video_frames = extract_frames(video_path, frames_per_video)

            for frame in video_frames:
                frames.append(frame)
                labels.append(1 if info['label'] == 'FAKE' else 0)

        # Save progress and print progress
        if idx % 10 == 0:  # Adjust this value to print progress more or less frequently
            elapsed_time = time.time() - start_time
            avg_time_per_video = elapsed_time / (idx - start_idx + 1)
            remaining_videos = total_videos - idx - 1
            estimated_remaining_time = avg_time_per_video * remaining_videos

            print(f"Processed {idx + 1} videos out of {total_videos}")
            print(f"Estimated remaining time: {estimated_remaining_time / 3600:.2f} hours")

            # Save progress
            progress = {
                'frames': frames,
                'labels': labels,
                'last_processed_idx': idx
            }
            with open(progress_path, 'wb') as f:
                pickle.dump(progress, f)

    return np.array(frames) / 255.0, np.array(labels)

# Check if there's saved progress
if os.path.exists(progress_path):
    with open(progress_path, 'rb') as f:
        progress = pickle.load(f)
        frames = progress['frames']
        labels = progress['labels']
        last_processed_idx = progress['last_processed_idx']
else:
    frames = []
    labels = []
    last_processed_idx = 0

# Continue processing videos from the last saved progress
X, y = process_videos(metadata, video_dir, start_idx=last_processed_idx)

# Split data into training and validation sets
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Check the size of the data
print(f"Training data shape: {X_train.shape}")
print(f"Validation data shape: {X_val.shape}")


Processed 1331 videos out of 1334
Estimated remaining time: 0.01 hours
Training data shape: (16, 299, 299, 3)
Validation data shape: (4, 299, 299, 3)


In [None]:
import os
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.models import load_model
from tensorflow.keras.layers import Input, GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.applications import Xception

# Define the model
def create_model():
    # Define input
    input_data = Input(shape=(299, 299, 3), name='input_data')

    # Base model using Xception
    base_model = Xception(weights='imagenet', include_top=False, input_tensor=input_data, name='xception_base')

    x = base_model.output
    x = GlobalAveragePooling2D(name='global_avg_pool')(x)

    # Add custom layers on top
    x = Dense(1024, activation='relu', name='dense1')(x)
    x = Dropout(0.5, name='dropout')(x)
    output_layer = Dense(1, activation='sigmoid', name='predictions')(x)

    # Define the full model
    model = Model(inputs=input_data, outputs=output_layer)

    # Compile the model
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Paths to save the model
model_path = '/content/drive/MyDrive/combined_model.keras'




In [None]:
# Load model if it exists, otherwise create a new one
if os.path.exists(model_path):
    model = load_model(model_path)
else:
    model = create_model()

# Callbacks for training
checkpoint = ModelCheckpoint(model_path, monitor='val_loss', save_best_only=True, mode='min')
early_stop = EarlyStopping(monitor='val_loss', patience=10, mode='min')

# Assuming you have X_train, X_val, y_train, and y_val already prepared

# Train the model
try:
    results = model.fit(
        X_train, y_train,  # Providing training data
        validation_data=(X_val, y_val),  # Providing validation data
        batch_size=4, epochs=100,
        callbacks=[checkpoint, early_stop]
    )
except RuntimeError as e:
    print(f"RuntimeError occurred: {e}")
    # Handle any additional error logging or recovery here