In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import os

# --- 1. Data Loading & Preparation ---

# Define data paths and parameters
data_dir = os.path.join('data')
train_dir = os.path.join(data_dir, 'train')

IMG_WIDTH = 32
IMG_HEIGHT = 32
BATCH_SIZE = 64

# Load the training dataset
train_ds = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    validation_split=0.2,  # Use 20% of training data for validation
    subset="training",
    seed=123,
    image_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE)

# Load the validation dataset
val_ds = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    validation_split=0.2,
    subset="validation",
    seed=123,
    image_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE)

# Get the class names (should be ['FAKE', 'REAL'])
class_names = train_ds.class_names
print("Class names:", class_names)

# Configure datasets for performance
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)


# --- 2. Function to Plot Training History ---

def plot_history(history, title):
    """Plots accuracy and loss curves for a given training history."""
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    
    epochs = range(len(acc))
    
    plt.figure(figsize=(12, 5))
    
    plt.subplot(1, 2, 1)
    plt.plot(epochs, acc, label='Training Accuracy')
    plt.plot(epochs, val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.title(f'{title} - Training and Validation Accuracy')
    
    plt.subplot(1, 2, 2)
    plt.plot(epochs, loss, label='Training Loss')
    plt.plot(epochs, val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title(f'{title} - Training and Validation Loss')
    
    plt.show()

# --- 3. Model 1: CNN Baseline (3-4 Conv Layers) ---

print("\n--- Training Model 1: Simple CNN Baseline ---")

# This is a binary classification (FAKE vs. REAL), so we use 1 output neuron
# with a 'sigmoid' activation.
num_classes = 1 

model_cnn = keras.Sequential([
    # Input layer: Rescale pixel values from [0, 255] to [0, 1]
    layers.Rescaling(1./255, input_shape=(IMG_HEIGHT, IMG_WIDTH, 3)),
    
    # Block 1
    layers.Conv2D(32, (3, 3), activation='relu', padding='same'),
    layers.MaxPooling2D((2, 2)),
    
    # Block 2
    layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
    layers.MaxPooling2D((2, 2)),
    
    # Block 3
    layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
    layers.MaxPooling2D((2, 2)),
    
    # Flatten the 3D feature maps to 1D
    layers.Flatten(),
    
    # Dense classifier layers
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),  # Dropout for regularization
    layers.Dense(num_classes, activation='sigmoid') # Sigmoid for binary output
])

# Compile the model
model_cnn.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model_cnn.summary()

# Train the model
epochs = 20
history_cnn = model_cnn.fit(
    train_ds,
    validation_data=val_ds,
    epochs=epochs
)

# Plot the results
plot_history(history_cnn, "Simple CNN")

# --- 4. Evaluate on Test Set ---
print("\n--- Evaluating on Test Set ---")
test_dir = os.path.join(data_dir, 'test')
test_ds = tf.keras.utils.image_dataset_from_directory(
    test_dir,
    image_size=(IMG_HEIGHT, IMG_WIDTH),
    batch_size=BATCH_SIZE
).cache().prefetch(buffer_size=AUTOTUNE)

test_loss, test_acc = model_cnn.evaluate(test_ds)
print(f"Final Test Accuracy: {test_acc*100:.2f}%")

# --- 5. Save the Trained Model ---
model_save_path = 'cifake_cnn_model.keras'
model_cnn.save(model_save_path)
print(f"\nModel saved to {model_save_path}")

Found 100000 files belonging to 2 classes.
Using 80000 files for training.
Found 100000 files belonging to 2 classes.
Using 20000 files for validation.
Class names: ['FAKE', 'REAL']

--- Training Model 1: Simple CNN Baseline ---


Epoch 1/20
[1m 172/1250[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m13s[0m 12ms/step - accuracy: 0.6938 - loss: 0.5655