In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/dataset1/frames/benmsfzfaz.mp4_000220.jpg
/kaggle/input/dataset1/frames/axntxmycwd.mp4_000090.jpg
/kaggle/input/dataset1/frames/akvmwkdyuv.mp4_000170.jpg
/kaggle/input/dataset1/frames/blzydqdfem.mp4_000140.jpg
/kaggle/input/dataset1/frames/bctvsmddgq.mp4_000100.jpg
/kaggle/input/dataset1/frames/axwgcsyphv.mp4_000300.jpg
/kaggle/input/dataset1/frames/ayqvfdhslr.mp4_000040.jpg
/kaggle/input/dataset1/frames/arkroixhey.mp4_000100.jpg
/kaggle/input/dataset1/frames/bhaaboftbc.mp4_000160.jpg
/kaggle/input/dataset1/frames/aorjvbyxhw.mp4_000170.jpg
/kaggle/input/dataset1/frames/bbhpvrmbse.mp4_000270.jpg
/kaggle/input/dataset1/frames/avibnnhwhp.mp4_000040.jpg
/kaggle/input/dataset1/frames/adylbeequz.mp4_000280.jpg
/kaggle/input/dataset1/frames/aipfdnwpoo.mp4_000030.jpg
/kaggle/input/dataset1/frames/agrmhtjdlk.mp4_000070.jpg
/kaggle/input/dataset1/frames/awnwkrqibf.mp4_000110.jpg
/kaggle/input/dataset1/frames/andaxzscny.mp4_000070.jpg
/kaggle/input/dataset1/frames/aybumesmpk.mp4_000

In [2]:
import json
import os
from glob import glob
import cv2
import numpy as np
import pandas as pd
from keras.preprocessing.image import img_to_array

# Load metadata
with open('/kaggle/input/dataset1/frames/metadata.json') as f:
    metadata = json.load(f)

# Create a DataFrame to map frames with their labels
data = []
frames_dir = '/kaggle/input/dataset1/frames'
for video_id, info in metadata.items():
    label = 1 if info['label'] == 'FAKE' else 0
    # Loop through each frame for that video
    frame_paths = glob(os.path.join(frames_dir, f"{video_id}_*.jpg"))
    for frame in frame_paths:
        data.append([frame, label])

df = pd.DataFrame(data, columns=['frame_path', 'label'])


In [3]:
from keras.applications.xception import Xception, preprocess_input
from keras.preprocessing.image import load_img

IMG_SIZE = (299, 299)

def preprocess_frame(frame_path):
    img = load_img(frame_path, target_size=IMG_SIZE)
    img = img_to_array(img)
    img = preprocess_input(img)
    return img

In [12]:
from sklearn.model_selection import train_test_split
from keras.applications.xception import Xception
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras.optimizers import Adam

# Convert the list of frame paths to a numpy array of images
frames = np.array([preprocess_frame(frame_path) for frame_path in df['frame_path']])
labels = np.array(df['label'])

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(frames, labels, test_size=0.2, random_state=42)

# Build and compile the model
base_model = Xception(weights='imagenet', include_top=False, input_shape=(299, 299, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=predictions)
for layer in base_model.layers:
    layer.trainable = False

model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=16)



Epoch 1/5
[1m206/206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 174ms/step - accuracy: 0.8529 - loss: 0.3920 - val_accuracy: 0.8806 - val_loss: 0.2600
Epoch 2/5
[1m206/206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 142ms/step - accuracy: 0.8981 - loss: 0.2346 - val_accuracy: 0.8952 - val_loss: 0.2231
Epoch 3/5
[1m206/206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 138ms/step - accuracy: 0.9258 - loss: 0.1730 - val_accuracy: 0.9135 - val_loss: 0.1836
Epoch 4/5
[1m206/206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 139ms/step - accuracy: 0.9285 - loss: 0.1569 - val_accuracy: 0.9330 - val_loss: 0.1563
Epoch 5/5
[1m206/206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 141ms/step - accuracy: 0.9333 - loss: 0.1411 - val_accuracy: 0.9403 - val_loss: 0.1477


<keras.src.callbacks.history.History at 0x7e87bb490a90>

In [13]:
# Get predictions for the test set
y_pred_probs = model.predict(X_test)

# Convert probabilities to binary classification (0 or 1)
y_pred = (y_pred_probs > 0.5).astype("int32")


[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 287ms/step


In [14]:
import pandas as pd

# Create a DataFrame with true labels and predictions
results_df = pd.DataFrame({'True Label': y_test, 'Predicted Label': y_pred.flatten()})

# Save to a CSV file
results_df.to_csv('predictions.csv', index=False)


In [15]:
from sklearn.metrics import accuracy_score, classification_report

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

# Classification report (Precision, Recall, F1-score)
report = classification_report(y_test, y_pred, target_names=['Real', 'Fake'])
print(report)

# Save classification report to a file
with open('classification_report.txt', 'w') as f:
    f.write(f"Accuracy: {accuracy}\n")
    f.write(report)


Accuracy: 0.9403166869671132
              precision    recall  f1-score   support

        Real       0.87      0.81      0.84       157
        Fake       0.96      0.97      0.96       664

    accuracy                           0.94       821
   macro avg       0.91      0.89      0.90       821
weighted avg       0.94      0.94      0.94       821



In [16]:
# Save the model architecture and weights
model.save('deepfake_detection_model.h5')


In [1]:
import tensorflow as tf

# Check TensorFlow version
print("TensorFlow version:", tf.__version__)

# Check Keras version (usually part of TensorFlow)
print("Keras version:", tf.keras.__version__)

TensorFlow version: 2.16.1
Keras version: 3.3.3


In [1]:
import json
import os
from glob import glob
import cv2
import numpy as np
import pandas as pd
from keras.preprocessing.image import img_to_array
from sklearn.utils import class_weight
from keras.applications.xception import Xception, preprocess_input
from keras.preprocessing.image import load_img
from sklearn.model_selection import train_test_split
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras.optimizers import Adam

# Load metadata
with open('/kaggle/input/dataset1/frames/metadata.json') as f:
    metadata = json.load(f)

# Create a DataFrame to map frames with their labels
data = []
frames_dir = '/kaggle/input/dataset1/frames'
for video_id, info in metadata.items():
    label = 1 if info['label'] == 'FAKE' else 0
    # Loop through each frame for that video
    frame_paths = glob(os.path.join(frames_dir, f"{video_id}_*.jpg"))
    for frame in frame_paths:
        data.append([frame, label])

df = pd.DataFrame(data, columns=['frame_path', 'label'])

IMG_SIZE = (299, 299)

def preprocess_frame(frame_path):
    img = load_img(frame_path, target_size=IMG_SIZE)
    img = img_to_array(img)
    img = preprocess_input(img)
    return img

# Convert the list of frame paths to a numpy array of images
frames = np.array([preprocess_frame(frame_path) for frame_path in df['frame_path']])
labels = np.array(df['label'])

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(frames, labels, test_size=0.2, random_state=42)

# Compute class weights
class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(y_train), y=y_train)
class_weights_dict = dict(enumerate(class_weights))

# Build and compile the model
base_model = Xception(weights='imagenet', include_top=False, input_shape=(299, 299, 3))
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)

model = Model(inputs=base_model.input, outputs=predictions)
for layer in base_model.layers:
    layer.trainable = False

model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train the model with class weights
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=16, class_weight=class_weights_dict)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.2f}")

# Predict on test set
y_pred = model.predict(X_test)
y_pred_classes = (y_pred > 0.5).astype(int)

# Print confusion matrix and classification report
from sklearn.metrics import confusion_matrix, classification_report
print(confusion_matrix(y_test, y_pred_classes))
print(classification_report(y_test, y_pred_classes))


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m83683744/83683744[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/5


I0000 00:00:1725755278.312447     103 service.cc:145] XLA service 0x7e8a2013bb70 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1725755278.312500     103 service.cc:153]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1725755278.312504     103 service.cc:153]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5


[1m  2/206[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m20s[0m 101ms/step - accuracy: 0.7656 - loss: 0.6537

I0000 00:00:1725755289.236427     103 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m206/206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m53s[0m 183ms/step - accuracy: 0.7020 - loss: 0.6000 - val_accuracy: 0.8295 - val_loss: 0.3321
Epoch 2/5
[1m206/206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 127ms/step - accuracy: 0.8384 - loss: 0.3400 - val_accuracy: 0.8916 - val_loss: 0.2737
Epoch 3/5
[1m206/206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 132ms/step - accuracy: 0.8767 - loss: 0.2645 - val_accuracy: 0.7966 - val_loss: 0.4236
Epoch 4/5
[1m206/206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 138ms/step - accuracy: 0.8850 - loss: 0.2241 - val_accuracy: 0.7771 - val_loss: 0.5017
Epoch 5/5
[1m206/206[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 143ms/step - accuracy: 0.9067 - loss: 0.1819 - val_accuracy: 0.8819 - val_loss: 0.2615


2024-09-08 00:30:45.630052: E external/local_xla/xla/service/slow_operation_alarm.cc:65] Trying algorithm eng3{k11=2} for conv (f32[32,128,147,147]{3,2,1,0}, u8[0]{0}) custom-call(f32[32,128,147,147]{3,2,1,0}, f32[128,1,3,3]{3,2,1,0}), window={size=3x3 pad=1_1x1_1}, dim_labels=bf01_oi01->bf01, feature_group_count=128, custom_call_target="__cudnn$convForward", backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"cudnn_conv_backend_config":{"conv_result_scale":1,"activation_mode":"kNone","side_input_scale":0,"leakyrelu_alpha":0}} is taking a while...
2024-09-08 00:30:45.634981: E external/local_xla/xla/service/slow_operation_alarm.cc:133] The operation took 1.005060639s
Trying algorithm eng3{k11=2} for conv (f32[32,128,147,147]{3,2,1,0}, u8[0]{0}) custom-call(f32[32,128,147,147]{3,2,1,0}, f32[128,1,3,3]{3,2,1,0}), window={size=3x3 pad=1_1x1_1}, dim_labels=bf01_oi01->bf01, feature_group_count=128, custom_call_target="__cudnn$convForward", backend_config={"operation_queu

[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 772ms/step - accuracy: 0.8921 - loss: 0.2405
Test Accuracy: 0.88
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 297ms/step
[[148   9]
 [ 88 576]]
              precision    recall  f1-score   support

           0       0.63      0.94      0.75       157
           1       0.98      0.87      0.92       664

    accuracy                           0.88       821
   macro avg       0.81      0.91      0.84       821
weighted avg       0.92      0.88      0.89       821



In [2]:
# Save the model architecture and weights
model.save('deepfake.h5')