In [11]:
import os
import json
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout  # Add Dropout here
from sklearn.model_selection import train_test_split
import glob

# Paths
real_json_path = "/kaggle/input/deepfake/DATASET/real_cifake_preds.json"
fake_json_path = "/kaggle/input/deepfake/DATASET/fake_cifake_preds.json"
real_image_dir = "/kaggle/input/deepfake/DATASET/real_cifake_images"
fake_image_dir = "/kaggle/input/deepfake/DATASET/fake_cifake_images"

# Load labels from JSON
def load_labels(json_path, label_value):
    with open(json_path, "r") as f:
        data = json.load(f)
    # Create a list of labels based on index in the json file
    labels = [label_value for _ in data]
    return labels

# Real images are labeled as 0 (real)
real_labels = load_labels(real_json_path, 0)

# Fake images are labeled as 1 (fake)
fake_labels = load_labels(fake_json_path, 1)


def load_images(image_dir, labels):
    image_paths = sorted(glob.glob(os.path.join(image_dir, "*.png"))) 
    images = []
    image_labels = []
    for i, img_path in enumerate(image_paths):
        img = load_img(img_path, target_size=(128, 128))  
        img_array = img_to_array(img) / 255.0  
        images.append(img_array)
        image_labels.append(labels[i])  
    return np.array(images), np.array(image_labels)


real_images, real_labels = load_images(real_image_dir, real_labels)
fake_images, fake_labels = load_images(fake_image_dir, fake_labels)


# combine the datasets
X = np.concatenate((real_images, fake_images), axis=0)
y = np.concatenate((real_labels, fake_labels), axis=0) 

# for initial testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



Configuration 3. More complex and power model

In [24]:
# Build CNN Model with more layers and powerful configuration
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    MaxPooling2D(2, 2),
    
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    
    Conv2D(256, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    
    Conv2D(512, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    
    Flatten(),
    
    Dense(512, activation='relu'),
    Dropout(0.5),  # Dropout for regularization
    
    Dense(256, activation='relu'),
    Dropout(0.5),  
    
    Dense(128, activation='relu'),
    Dropout(0.5),  
    
    Dense(1, activation='sigmoid')  # bin classification real or fake
])

# Compile Model with adjusted parameters
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),  # Lower learning rate for stability in learning
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Train Model
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))
train_accuracy = history.history['accuracy'][-1]
print(f"Training Accuracy: {train_accuracy * 100:.2f}%")


#evaluate the accuracy on the test part of the training dataset i.e 20 percent
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")


Epoch 1/20
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 35ms/step - accuracy: 0.4951 - loss: 0.6913 - val_accuracy: 0.4975 - val_loss: 0.6805
Epoch 2/20
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.5296 - loss: 0.6646 - val_accuracy: 0.7175 - val_loss: 0.5771
Epoch 3/20
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.7250 - loss: 0.5778 - val_accuracy: 0.7575 - val_loss: 0.5094
Epoch 4/20
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.7613 - loss: 0.5009 - val_accuracy: 0.7700 - val_loss: 0.4847
Epoch 5/20
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.7857 - loss: 0.4882 - val_accuracy: 0.7725 - val_loss: 0.4676
Epoch 6/20
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.7913 - loss: 0.4633 - val_accuracy: 0.7750 - val_loss: 0.4791
Epoch 7/20
[1m50/50[0m [32m━━━━

saving the 80 train

In [18]:
# Save the trained model
model.save("/kaggle/working/model.train80.keras")
print("Model saved successfully.")


Model saved successfully.


**TRAINING ON THE ENTIRE DATASET NO SPLIT**

In [None]:
# Train Model on full dataset leaving no part for testing as testing is already done in 20 percent of training data prior
history = model.fit(X, y, epochs=20, batch_size=32)

train_accuracy = history.history['accuracy'][-1]
print(f"Training Accuracy: {train_accuracy * 100:.2f}%")


Save the model

In [15]:
# Save the trained model
model.save("/kaggle/working/model.fullTrain.keras")
print("Model saved successfully.")


Model saved successfully.


Saving the output

In [17]:
import os
import json
import numpy as np
import glob
from keras.models import load_model
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# dataset path
test_image_dir = "/kaggle/input/deepfake/DATASET/test"

# Load the test images
def load_test_images(image_dir):
    image_paths = sorted(glob.glob(os.path.join(image_dir, "*.png")))  #png is assumed
    images = []
    filenames = []
    for img_path in image_paths:
        img = load_img(img_path, target_size=(128, 128))  # Resize
        img_array = img_to_array(img) / 255.0  # Normalize
        images.append(img_array)
        filenames.append(os.path.basename(img_path))  # Store filenames
    return np.array(images), filenames


test_images, filenames = load_test_images(test_image_dir)

# load the saved model
model = load_model("/kaggle/working/model.fullTrain.keras")


predictions = model.predict(test_images)
predictions = (predictions > 0.5).astype(int)  # Convert probabilities to 0 (real) or 1 (fake)

# store in json
output = []
for i, pred in enumerate(predictions):
    output.append({"index": i + 1, "prediction": "fake" if pred == 1 else "real"})

# Save to json file
output_json_path = "predictions.json"
with open(output_json_path, "w") as f:
    json.dump(output, f, indent=4)

print(f"Predictions saved to {output_json_path}")


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step
Predictions saved to predictions.json
