In [None]:

import numpy as np 
import pandas as pd 
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
import cv2
plt.style.use('ggplot')
from IPython.display import Video
from IPython.display import HTML

In [None]:
train_sample_metadata = pd.read_json('../input/deepfake-detection-challenge/train_sample_videos/metadata.json').T
train_sample_metadata.head()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import cv2
import os

# Load metadata
train_sample_metadata = pd.read_json('../input/deepfake-detection-challenge/train_sample_videos/metadata.json').T

# Display the first few rows of the metadata
print(train_sample_metadata.head())

# Visualize the distribution of fake vs. real videos
plt.figure(figsize=(8, 6))
sns.countplot(data=train_sample_metadata, x='label')
plt.title('Distribution of Fake vs. Real Videos')
plt.xlabel('Label')
plt.ylabel('Count')
plt.show()

# Define a function to display a frame from a video
def display_video_frame(video_path):
    cap = cv2.VideoCapture(video_path)
    ret, frame = cap.read()
    cap.release()
    
    if ret:
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        plt.imshow(frame)
        plt.axis('off')
        plt.show()
    else:
        print(f"Failed to read video: {video_path}")

# Path to the video folder
video_folder = '../input/deepfake-detection-challenge/train_sample_videos/'

# Display some sample frames from fake videos
fake_videos = train_sample_metadata[train_sample_metadata['label'] == 'FAKE'].index
print("Sample frames from Fake videos:")
for video in fake_videos[:3]:  # Display the first 3 fake videos
    print(f"Video: {video}")
    display_video_frame(os.path.join(video_folder, video))

# Display some sample frames from real videos
real_videos = train_sample_metadata[train_sample_metadata['label'] == 'REAL'].index
print("Sample frames from Real videos:")
for video in real_videos[:3]:  # Display the first 3 real videos
    print(f"Video: {video}")
    display_video_frame(os.path.join(video_folder, video))

In [None]:
!pip install mtcnn tensorflow opencv-python pandas numpy scikit-learn

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from mtcnn import MTCNN
from tensorflow.keras.utils import Sequence
from tensorflow.keras.preprocessing.image import img_to_array
from sklearn.model_selection import train_test_split

# Initialize MTCNN face detector
detector = MTCNN()

# Paths and setup
video_folder = '../input/deepfake-detection-challenge/train_sample_videos/'
metadata_path = '../input/deepfake-detection-challenge/train_sample_videos/metadata.json'

train_sample_metadata = pd.read_json(metadata_path).T

train_metadata, val_metadata = train_test_split(train_sample_metadata, test_size=0.2, random_state=42)

class VideoFrameGenerator(Sequence):
    def __init__(self, metadata, batch_size=32, target_size=(224, 224), shuffle=True):
        self.metadata = metadata
        self.batch_size = batch_size
        self.target_size = target_size
        self.shuffle = shuffle
        self.indexes = np.arange(len(self.metadata))
        self.on_epoch_end()
    
    def __len__(self):
               return int(np.ceil(len(self.metadata) / self.batch_size))
    
    def __getitem__(self, index):
        batch_indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        batch_metadata = self.metadata.iloc[batch_indexes]
        
        X, y_labels = self.__data_generation(batch_metadata)
        return X, y_labels
    
    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indexes)
    
    def __data_generation(self, batch_metadata):
        X = []
        y_labels = []
        
        for video_name, row in batch_metadata.iterrows():
            video_path = os.path.join(video_folder, video_name)
            label = 1 if row['label'] == 'FAKE' else 0
            
            cap = cv2.VideoCapture(video_path)
            while cap.isOpened():
                                ret, frame = cap.read()
                if not ret:
                    break
                
                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                faces = detector.detect_faces(frame_rgb)
                
                for face in faces:
                    x, y, width, height = face['box']
                    face_img = frame_rgb[y:y+height, x:x+width]
                    face_img = cv2.resize(face_img, self.target_size)  # Resize face to target_size
                    face_array = img_to_array(face_img) / 255.0  # Normalize pixel values
                    
                    X.append(face_array)
                    y_labels.append(label)
                    
                    if len(X) >= self.batch_size:
                        cap.release()
                        return np.array(X), np.array(y_labels)
            
            cap.release()
        
        while len(X) < self.batch_size:
            X.append(X[0])
            y_labels.append(y_labels[0])
        
        return np.array(X), np.array(y_labels)

batch_size = 32
train_generator = VideoFrameGenerator(train_metadata, batch_size=batch_size)
val_generator = VideoFrameGenerator(val_metadata, batch_size=batch_size)

# Build the model
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(
    train_generator,
    epochs=2,
    validation_data=val_generator
)

# Evaluate the model 
loss, accuracy = model.evaluate(val_generator)
print(f"Validation Loss: {loss}")
print(f"Validation Accuracy: {accuracy}")

model_save_path = '/kaggle/working/deepfake_detection_model.h5'  
model.save(model_save_path)
print(f"Model saved to {model_save_path}")


In [None]:
import os
import cv2
import numpy as np
from mtcnn import MTCNN
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import img_to_array

# Initialize MTCNN face detector
detector = MTCNN()

# Load the trained model
# model = load_model('path/to/your/trained_model.h5')  # Update with the actual path to your model

# Function to detect and preprocess faces from a video
def extract_faces_from_video(video_path, target_size=(224, 224)):
    cap = cv2.VideoCapture(video_path)
    faces = []

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        detected_faces = detector.detect_faces(frame_rgb)

        for face in detected_faces:
            x, y, width, height = face['box']
            face_img = frame_rgb[y:y+height, x:x+width]
            face_img = cv2.resize(face_img, target_size)
            face_array = img_to_array(face_img) / 255.0
            faces.append(face_array)
    
    cap.release()
    return np.array(faces)

# Function to predict if the video is fake or real
def predict_video(video_path):
    faces = extract_faces_from_video(video_path)
    if len(faces) == 0:
                print("No faces detected in the video.")
        return None

    predictions = model.predict(faces)
    avg_prediction = np.mean(predictions)

    if avg_prediction > 0.5:
        print(f"The video '{video_path}' is predicted to be FAKE.")
    else:
        print(f"The video '{video_path}' is predicted to be REAL.")

    return avg_prediction

# Test the prediction function with a sample video
video_path = '/kaggle/input/deepfake-detection-challenge/test_videos/aassnaulhq.mp4'  # Update with the actual path to the test video
predict_video(video_path)