In [1]:
import os
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import log_loss
from PIL import ImageFile


In [2]:
ImageFile.LOAD_TRUNCATED_IMAGES = True

TRAIN_REAL_DIR = "train/real"   # Change this
TRAIN_FAKE_DIR = "train/fake"   # Change this
TEST_DIR = "test"               # Change this
#TEST_LABELS_PATH = "path/to/test_labels.csv"  # Ground truth (if available)

IMG_SIZE = (224, 224)  # Input size for CNN
BATCH_SIZE = 32


In [3]:
train_datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.3,
    horizontal_flip=True,
    brightness_range=[0.5, 1.5],
    validation_split=0.2
)

train_generator = train_datagen.flow_from_directory(
    "train",
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="binary",
    subset="training"
)

val_generator = train_datagen.flow_from_directory(
    "train",
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="binary",
    subset="validation"
)

class_weights = compute_class_weight(
    class_weight="balanced",
    classes=np.unique(train_generator.classes),
    y=train_generator.classes
)
class_weights_dict = {i: class_weights[i] for i in range(len(class_weights))}

print(f"Class Weights: {class_weights_dict}")


Found 38400 images belonging to 2 classes.
Found 9600 images belonging to 2 classes.
Class Weights: {0: 1.0, 1: 1.0}


In [4]:
model = Sequential([
    Conv2D(32, (3, 3), activation="relu", input_shape=(224, 224, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation="relu"),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation="relu"),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(128, activation="relu", kernel_regularizer=l2(0.01)),
    Dropout(0.5),
    Dense(1, activation="sigmoid")  # Binary classification
])

model.compile(optimizer=Adam(learning_rate=0.0001),
              loss="binary_crossentropy",
              metrics=["accuracy"])

history = model.fit(train_generator,
                    validation_data=val_generator,
                    epochs=10,
                    class_weight=class_weights_dict)

final_train_accuracy = history.history['accuracy'][-1]
final_val_accuracy = history.history['val_accuracy'][-1]

print(f"Final Training Accuracy: {final_train_accuracy:.4f}")
print(f"Final Validation Accuracy: {final_val_accuracy:.4f}")

model.save("fake_image_detector.h5")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10


  self._warn_if_super_not_called()


[1m  74/1200[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m2:13:00[0m 7s/step - accuracy: 0.5266 - loss: 2.4088



[1m 213/1200[0m [32m━━━[0m[37m━━━━━━━━━━━━━━━━━[0m [1m1:32:47[0m 6s/step - accuracy: 0.5578 - loss: 1.7539



[1m 433/1200[0m [32m━━━━━━━[0m[37m━━━━━━━━━━━━━[0m [1m1:06:29[0m 5s/step - accuracy: 0.5816 - loss: 1.3819



[1m 474/1200[0m [32m━━━━━━━[0m[37m━━━━━━━━━━━━━[0m [1m1:02:40[0m 5s/step - accuracy: 0.5849 - loss: 1.3413



[1m 488/1200[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m1:01:16[0m 5s/step - accuracy: 0.5860 - loss: 1.3285



[1m 740/1200[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m38:28[0m 5s/step - accuracy: 0.6025 - loss: 1.1639



[1m1086/1200[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m9:23[0m 5s/step - accuracy: 0.6175 - loss: 1.0405



[1m1200/1200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5s/step - accuracy: 0.6215 - loss: 1.0123



[1m1200/1200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7106s[0m 6s/step - accuracy: 0.6216 - loss: 1.0121 - val_accuracy: 0.6661 - val_loss: 0.6468
Epoch 2/10
[1m1200/1200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6710s[0m 6s/step - accuracy: 0.7165 - loss: 0.6053 - val_accuracy: 0.6875 - val_loss: 0.6313
Epoch 3/10
[1m1200/1200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6701s[0m 6s/step - accuracy: 0.7249 - loss: 0.5889 - val_accuracy: 0.6678 - val_loss: 0.6430
Epoch 4/10
[1m1200/1200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6534s[0m 5s/step - accuracy: 0.7311 - loss: 0.5788 - val_accuracy: 0.7011 - val_loss: 0.6113
Epoch 5/10
[1m1200/1200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6509s[0m 5s/step - accuracy: 0.7435 - loss: 0.5672 - val_accuracy: 0.7138 - val_loss: 0.5869
Epoch 6/10
[1m1200/1200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6499s[0m 5s/step - accuracy: 0.7467 - loss: 0.5651 - val_accuracy: 0.7181 - val_loss: 0.5976
Epoch 7/10
[1m



Final Training Accuracy: 0.7654
Final Validation Accuracy: 0.7302


In [None]:
test_images = sorted(os.listdir(TEST_DIR))  # Ensure consistent order
predictions = []

for img_name in test_images:
    img_path = os.path.join(TEST_DIR, img_name)
    
    try:
        img = image.load_img(img_path, target_size=IMG_SIZE)
        img_array = image.img_to_array(img) / 255.0  # Normalize
        img_array = np.expand_dims(img_array, axis=0)

        pred_prob = model.predict(img_array)[0][0]  # Get probability
        binary_label = 1 if pred_prob > 0.5 else 0  # Convert to binary

        predictions.append([img_name, binary_label, pred_prob])  
    
    except Exception as e:
        print(f"Skipping corrupted image: {img_name} | Error: {e}")

submission_df = pd.DataFrame(predictions, columns=["filename", "binary_label", "logloss_label"])
submission_df.to_csv("submission_final.csv", index=False)
print("Submission file 'submission_final.csv' created successfully!")

try:
    ground_truth = pd.read_csv(TEST_LABELS_PATH)  # CSV with 'filename' & 'actual_label'
    merged = submission_df.merge(ground_truth, on="filename")
    logloss_score = log_loss(merged["actual_label"], merged["logloss_label"])
    print(f"LogLoss Score: {logloss_score}")
except Exception as e:
    print("Ground truth labels not found. LogLoss cannot be calculated.")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31