In [1]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.utils import image_dataset_from_directory
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import TensorBoard
import time
import matplotlib.pyplot as plt
from matplotlib import gridspec

In [2]:
NAME = "cifakeCNN{}".format(time.strftime("%Y%m%d-%H%M%S"))

tensorboard = TensorBoard(log_dir="logs/{}".format(NAME))

In [3]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  0


In [4]:
# gpus = tf.config.list_physical_devices('GPU')
# if gpus:
#   # Restrict TensorFlow to only allocate 1GB of memory on the first GPU
#   try:
#     tf.config.set_logical_device_configuration(
#         gpus[0],
#         [tf.config.LogicalDeviceConfiguration(memory_limit=1024)])
#     logical_gpus = tf.config.list_logical_devices('GPU')
#     print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
#   except RuntimeError as e:
#     # Virtual devices must be set before GPUs have been initialized
#     print(e)

In [5]:
# tf.config.threading.set_intra_op_parallelism_threads(os.cpu_count()//2)
# tf.config.threading.set_inter_op_parallelism_threads(os.cpu_count()//2)

In [6]:
ds_train = image_dataset_from_directory(
    '../cifar-10-stable-diffusion-detection/train',
    labels='inferred',
    label_mode='binary',
    image_size=[64,64],
    interpolation='nearest',
    batch_size=32,
    shuffle=True,
    seed=69,
    #validation_split=None,
    subset=None,
    follow_links=False,
    crop_to_aspect_ratio=False,
)

Found 100000 files belonging to 2 classes.


In [7]:
ds_test = image_dataset_from_directory(
    '../cifar-10-stable-diffusion-detection/test',
    labels='inferred',
    label_mode='binary',
    image_size=[64,64],
    interpolation='nearest',
    batch_size=32,
    shuffle=True,
    seed=69,
    #validation_split=None,
    subset=None,
    follow_links=False,
    crop_to_aspect_ratio=False,
)

Found 20000 files belonging to 2 classes.


In [8]:
def convert_to_float(image, label):
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    return image, label

AUTOTUNE = tf.data.experimental.AUTOTUNE


In [9]:
ds_train = (
    ds_train
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)


In [10]:
ds_valid = (
    ds_test
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)

In [11]:
early_stopping = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=8,
    min_delta=0.001,
    restore_best_weights=True)

In [12]:
from tensorflow.keras.callbacks import ReduceLROnPlateau

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0008)

In [13]:
from tensorflow.keras.callbacks import ModelCheckpoint
timestamp = time.strftime("%Y%m%d-%H%M%S")
model_checkpoint = ModelCheckpoint(
    f'best/best_model_{timestamp}.h5', monitor='val_loss', save_best_only=True, mode='min'
)

In [14]:
from keras_tuner import RandomSearch

def build_model(hp):
    model = Sequential([
        layers.InputLayer(input_shape=[64,  64,  3]),
        layers.experimental.preprocessing.RandomRotation(factor=0.1488228133769420),
        layers.experimental.preprocessing.RandomTranslation(height_factor=0.069, width_factor=0.069),
        preprocessing.RandomFlip(mode='horizontal'),
        layers.BatchNormalization(renorm=True),
        layers.Conv2D(filters=hp.Int('conv_1_filter', min_value=2, max_value=4, step=2),
                      kernel_size=hp.Choice('conv_1_kernel', values=[3,  7]),
                      activation='relu', padding='same'),
    ])
    
    if hp.Boolean('use_maxpool_first'):
        model.add(layers.MaxPool2D())
    
    model.add(layers.BatchNormalization(renorm=True))
    model.add(layers.Conv2D(filters=hp.Int('conv_2_filter', min_value=2, max_value=4, step=2),
                            kernel_size=hp.Choice('conv_2_kernel', values=[3,  7]),
                            activation='relu', padding='same'))
    model.add(layers.MaxPool2D())
    model.add(layers.BatchNormalization(renorm=True))
    model.add(layers.Conv2D(filters=hp.Int('conv_3_filter', min_value=2, max_value=4, step=2),
                            kernel_size=hp.Choice('conv_3_kernel', values=[3,  7]),
                            activation='relu', padding='same'))
    model.add(layers.Conv2D(filters=hp.Int('conv_4_filter', min_value=2, max_value=4, step=2),
                            kernel_size=hp.Choice('conv_4_kernel', values=[3,  7]),
                            activation='relu', padding='same'))
    model.add(layers.MaxPool2D())
    model.add(layers.BatchNormalization(renorm=True))
    model.add(layers.Flatten())
    
    # Add dense layers based on hyperparameters
    for i in range(hp.Int('num_dense_layers', min_value=0, max_value=4)):  # Adjust the range as needed
        model.add(layers.Dense(hp.Int(f'dense_{i+1}_units', min_value=1, max_value=513, step=256),
                     activation='relu'))
        model.add(layers.Dropout(hp.Float(f'dropout_{i+1}_rate', min_value=0.0, max_value=0.5, step=0.1)))
    
    model.add(layers.Dense(1, activation='sigmoid'))
    
    optimizer = tf.keras.optimizers.Adam(learning_rate=hp.Float('learning_rate', min_value=0.001, max_value=0.1, sampling='LOG'),
                                         epsilon=0.01)
    model.compile(optimizer=optimizer,
                  loss='binary_crossentropy',
                  metrics=['binary_accuracy'])
    return model



In [None]:
tuner = RandomSearch(
    build_model,
    objective='val_binary_accuracy',
    max_trials=15,
    executions_per_trial=3,
    directory='projects',
    project_name='cifake')



# Start the hyperparameter search with early stopping
tuner.search(ds_train, epochs=50, validation_data=ds_valid, callbacks=[early_stopping, tensorboard, reduce_lr, model_checkpoint])

# Get the best model
best_model = tuner.get_best_models(num_models=1)[0]


Search: Running Trial #1

Value             |Best Value So Far |Hyperparameter
4                 |4                 |conv_1_filter
7                 |7                 |conv_1_kernel
False             |False             |use_maxpool_first
4                 |4                 |conv_2_filter
3                 |3                 |conv_2_kernel
2                 |2                 |conv_3_filter
7                 |7                 |conv_3_kernel
2                 |2                 |conv_4_filter
3                 |3                 |conv_4_kernel
3                 |3                 |num_dense_layers
0.0012475         |0.0012475         |learning_rate

Epoch 1/50
 275/3125 [=>............................] - ETA: 11:11 - loss: 0.6931 - binary_accuracy: 0.5057

In [None]:
best_model.summary()

In [None]:
tuner.results_summary()

In [None]:
best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

hyperparameters_str = "_".join(f"{k}={v}__" for k, v in best_hyperparameters.get_config().items())

NAME += "_" + hyperparameters_str


best_model.save(f"best/{NAME}.h5")