In [1]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.utils import image_dataset_from_directory
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import TensorBoard
import time
import matplotlib.pyplot as plt
from matplotlib import gridspec

2024-02-19 21:15:40.905826: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
NAME = "cifakeCNN{}".format(int(time.time()))

tensorboard = TensorBoard(log_dir="logs/{}".format(NAME))

In [3]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  0


In [4]:
ds_train = image_dataset_from_directory(
    '../cifar-10-stable-diffusion-detection/train',
    labels='inferred',
    label_mode='binary',
    image_size=[64,64],
    interpolation='nearest',
    batch_size=32,
    shuffle=True,
    seed=69,
    #validation_split=None,
    subset=None,
    follow_links=False,
    crop_to_aspect_ratio=False,
)

Found 100000 files belonging to 2 classes.


In [5]:
ds_test = image_dataset_from_directory(
    '../cifar-10-stable-diffusion-detection/test',
    labels='inferred',
    label_mode='binary',
    image_size=[64,64],
    interpolation='nearest',
    batch_size=32,
    shuffle=True,
    seed=69,
    #validation_split=None,
    subset=None,
    follow_links=False,
    crop_to_aspect_ratio=False,
)

Found 20000 files belonging to 2 classes.


In [6]:
def convert_to_float(image, label):
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    return image, label

AUTOTUNE = tf.data.experimental.AUTOTUNE


In [7]:
ds_train = (
    ds_train
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)


In [8]:
ds_valid = (
    ds_test
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)

In [9]:
early_stopping = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=8,
    min_delta=0.001,
    restore_best_weights=True)

In [10]:
from tensorflow.keras.callbacks import ReduceLROnPlateau

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0008)

In [11]:
from keras_tuner import RandomSearch

def build_model(hp):
    model = Sequential([
        layers.InputLayer(input_shape=[64,  64,  3]),
        layers.experimental.preprocessing.RandomRotation(factor=0.1488228133769420),
        layers.experimental.preprocessing.RandomTranslation(height_factor=0.069, width_factor=0.069),
        preprocessing.RandomFlip(mode='horizontal'),
        layers.BatchNormalization(renorm=True),
        layers.Conv2D(filters=hp.Int('conv_1_filter', min_value=2, max_value=4, step=2),
                      kernel_size=hp.Choice('conv_1_kernel', values=[3,  7]),
                      activation='relu', padding='same'),
    ])
    
    if hp.Boolean('use_maxpool_first'):
        model.add(layers.MaxPool2D())
    
    model.add(layers.BatchNormalization(renorm=True))
    model.add(layers.Conv2D(filters=hp.Int('conv_2_filter', min_value=2, max_value=4, step=2),
                            kernel_size=hp.Choice('conv_2_kernel', values=[3,  7]),
                            activation='relu', padding='same'))
    model.add(layers.MaxPool2D())
    model.add(layers.BatchNormalization(renorm=True))
    model.add(layers.Conv2D(filters=hp.Int('conv_3_filter', min_value=2, max_value=4, step=2),
                            kernel_size=hp.Choice('conv_3_kernel', values=[3,  7]),
                            activation='relu', padding='same'))
    model.add(layers.Conv2D(filters=hp.Int('conv_4_filter', min_value=2, max_value=4, step=2),
                            kernel_size=hp.Choice('conv_4_kernel', values=[3,  7]),
                            activation='relu', padding='same'))
    model.add(layers.MaxPool2D())
    model.add(layers.BatchNormalization(renorm=True))
    model.add(layers.Flatten())
    
    # Add dense layers based on hyperparameters
    for i in range(hp.Int('num_dense_layers', min_value=0, max_value=4)):  # Adjust the range as needed
        model.add(layers.Dense(hp.Int(f'dense_{i+1}_units', min_value=1, max_value=513, step=256),
                     activation='relu'))
        model.add(layers.Dropout(hp.Float(f'dropout_{i+1}_rate', min_value=0.0, max_value=0.5, step=0.1)))
    
    model.add(layers.Dense(1, activation='sigmoid'))
    
    optimizer = tf.keras.optimizers.Adam(learning_rate=hp.Float('learning_rate', min_value=0.001, max_value=0.1, sampling='LOG'),
                                         epsilon=0.01)
    model.compile(optimizer=optimizer,
                  loss='binary_crossentropy',
                  metrics=['binary_accuracy'])
    return model



In [12]:
tuner = RandomSearch(
    build_model,
    objective='val_binary_accuracy',
    max_trials=15,
    executions_per_trial=3,
    directory='projects',
    project_name='cifake')



# Start the hyperparameter search with early stopping
tuner.search(ds_train, epochs=50, validation_data=ds_valid, callbacks=[early_stopping, tensorboard])

# Get the best model
best_model = tuner.get_best_models(num_models=1)[0]


Search: Running Trial #1

Value             |Best Value So Far |Hyperparameter
4                 |4                 |conv_1_filter
7                 |7                 |conv_1_kernel
True              |True              |use_maxpool_first
4                 |4                 |conv_2_filter
3                 |3                 |conv_2_kernel
4                 |4                 |conv_3_filter
3                 |3                 |conv_3_kernel
2                 |2                 |conv_4_filter
3                 |3                 |conv_4_kernel
3                 |3                 |num_dense_layers
0.0099139         |0.0099139         |learning_rate

Epoch 1/50
 234/3125 [=>............................] - ETA: 3:16 - loss: 0.6933 - binary_accuracy: 0.5004

KeyboardInterrupt: 

In [None]:
best_model.summary()

In [None]:
tuner.results_summary()

In [None]:

# Plot learning curves
import pandas as pd
history_frame = pd.DataFrame(history.history)
history_frame.loc[:, ['loss', 'val_loss']].plot()
history_frame.loc[:, ['binary_accuracy', 'val_binary_accuracy']].plot();
