In [None]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras import mixed_precision  # Correct import for mixed precision
from sklearn.metrics import classification_report, confusion_matrix

# Enable mixed precision training
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_global_policy(policy)

# Directories
base_dir = '../real_vs_fake/real-vs-fake'
train_dir = os.path.join(base_dir, 'train')
valid_dir = os.path.join(base_dir, 'valid')
test_dir = os.path.join(base_dir, 'test')

batch_size = 32  # Larger batch size
img_size = (150, 150)

# Function to preprocess the dataset
def preprocess_image(image, label):
    image = tf.image.resize(image, [150, 150])
    image = image / 255.0  # rescale pixel values
    return image, label

def load_dataset(directory):
    dataset = tf.keras.preprocessing.image_dataset_from_directory(
        directory,
        image_size=img_size,
        batch_size=batch_size,
        label_mode='binary'
    )
    return dataset.map(preprocess_image)

# Load datasets
train_dataset = load_dataset(train_dir).shuffle(1000).cache().prefetch(tf.data.AUTOTUNE)
valid_dataset = load_dataset(valid_dir).cache().prefetch(tf.data.AUTOTUNE)
test_dataset = load_dataset(test_dir).cache().prefetch(tf.data.AUTOTUNE)

# Load VGG16
base_model = tf.keras.applications.VGG16(
    weights='imagenet', 
    include_top=False, 
    input_shape=(150, 150, 3)
)

for layer in base_model.layers:  # Freeze most layers
    layer.trainable = True

# Add custom layers on top of the base model
model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(512, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(512, activation='relu'),
    layers.Dense(1, activation='sigmoid', dtype='float32')  # Output for binary classification
])

# Compile model with a learning rate scheduler
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=0.001,
    decay_steps=10000,
    decay_rate=0.96,
    staircase=True
)

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Adding early stopping
early_stopping = EarlyStopping(
    monitor='val_loss',
    mode='min',
    patience=5,
    restore_best_weights=True
)

# Train the model
history = model.fit(
    train_dataset,
    epochs=100,  # High epoch count due to early stopping
    validation_data=valid_dataset,
    callbacks=[early_stopping]
)

# Evaluate the model
test_loss, test_acc = model.evaluate(test_dataset)
print(f"Test accuracy: {test_acc:.4f}")

The dtype policy mixed_float16 may run slowly because this machine does not have a GPU. Only Nvidia GPUs with compute capability of at least 7.0 run quickly with mixed_float16.
Found 102041 files belonging to 2 classes.
Found 20000 files belonging to 2 classes.
Found 20000 files belonging to 2 classes.




Epoch 1/100


2024-09-10 21:34:11.546046: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype string and shape [102041]
	 [[{{node Placeholder/_0}}]]
2024-09-10 21:34:11.546296: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [102041]
	 [[{{node Placeholder/_4}}]]
2024-09-10 21:34:12.582389: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2024-09-10 21:34:22.966313: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:392] Filling up shuffle buffer (this may take a while): 12 of 1000
2024-09-10 21:34:32.900749: I tensorflow/

  52/3189 [..............................] - ETA: 2:34:46 - loss: 0.7583 - accuracy: 0.4862

In [None]:
model.save('pretrained_trainable_VGG16.h5')  # HDF5 format

In [None]:
history_df = pd.DataFrame(history.history)

In [None]:
history_df

In [None]:
# lets add model history to CSV so that we can make a cool graph out of it later
# Pls change layer name :)
history_df.to_csv("trainable_VGG16_model_history.csv", index = True)