In [1]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.utils import image_dataset_from_directory
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import TensorBoard
import time
import matplotlib.pyplot as plt
from matplotlib import gridspec

In [2]:
NAME = "cifakeCNN{}".format(int(time.time()))

tensorboard = TensorBoard(log_dir="logs/{}".format(NAME))

In [3]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  0


In [4]:
ds_train = image_dataset_from_directory(
    '../cifar-10-stable-diffusion-detection/train',
    labels='inferred',
    label_mode='binary',
    image_size=[64,64],
    interpolation='nearest',
    batch_size=32,
    shuffle=True,
)

Found 100000 files belonging to 2 classes.


In [5]:
ds_test = image_dataset_from_directory(
    '../cifar-10-stable-diffusion-detection/test',
    labels='inferred',
    label_mode='binary',
    image_size=[64,64],
    interpolation='nearest',
    batch_size=32,
    shuffle=True,
)

Found 20000 files belonging to 2 classes.


In [6]:
def convert_to_float(image, label):
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    return image, label

AUTOTUNE = tf.data.experimental.AUTOTUNE


In [7]:
ds_train = (
    ds_train
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)


In [8]:
ds_valid = (
    ds_test
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)

In [9]:
early_stopping = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    min_delta=0.001,
    restore_best_weights=True)

In [10]:
from keras_tuner import RandomSearch

def build_model(hp):
    model = Sequential([
        layers.InputLayer(input_shape=[64,  64,  3]),
        preprocessing.RandomFlip(mode='horizontal'),
        layers.BatchNormalization(renorm=True),
        layers.Conv2D(filters=hp.Int('conv_1_filter', min_value=2, max_value=4, step=2),
                      kernel_size=hp.Choice('conv_1_kernel', values=[3,  5]),
                      activation='relu', padding='same'),
        layers.MaxPool2D(),
        layers.BatchNormalization(renorm=True),
        layers.Conv2D(filters=hp.Int('conv_2_filter', min_value=2, max_value=4, step=2),
                      kernel_size=hp.Choice('conv_2_kernel', values=[3,  5]),
                      activation='relu', padding='same'),
        layers.MaxPool2D(),
        layers.BatchNormalization(renorm=True),
        layers.Conv2D(filters=hp.Int('conv_3_filter', min_value=2, max_value=4, step=2),
                      kernel_size=hp.Choice('conv_3_kernel', values=[3,  5]),
                      activation='relu', padding='same'),
        layers.Conv2D(filters=hp.Int('conv_4_filter', min_value=2, max_value=4, step=2),
                      kernel_size=hp.Choice('conv_4_kernel', values=[3,  5]),
                      activation='relu', padding='same'),
        layers.MaxPool2D(),
        layers.BatchNormalization(renorm=True),
        layers.Flatten(),
        layers.Dense(hp.Int('dense_1_units', min_value=16, max_value=64, step=16),
                     activation='relu'),
        layers.Dropout(hp.Float('dropout_rate', min_value=0.0, max_value=0.5, step=0.1)),
        layers.Dense(1, activation='sigmoid'),
    ])
    optimizer = tf.keras.optimizers.Adam(learning_rate=hp.Float('learning_rate', min_value=0.001, max_value=0.1, sampling='LOG'),
                                         epsilon=0.01)
    model.compile(optimizer=optimizer,
                  loss='binary_crossentropy',
                  metrics=['binary_accuracy'])
    return model



In [11]:
tuner = RandomSearch(
    build_model,
    objective='val_binary_accuracy',
    max_trials=5,
    executions_per_trial=3,
    directory='projects',
    project_name='cifake')



# Start the hyperparameter search with early stopping
tuner.search(ds_train, epochs=50, validation_data=ds_valid, callbacks=[early_stopping, tensorboard])

# Get the best model
best_model = tuner.get_best_models(num_models=1)[0]


Search: Running Trial #1

Value             |Best Value So Far |Hyperparameter
2                 |2                 |conv_1_filter
3                 |3                 |conv_1_kernel
2                 |2                 |conv_2_filter
3                 |3                 |conv_2_kernel
4                 |4                 |conv_3_filter
5                 |5                 |conv_3_kernel
4                 |4                 |conv_4_filter
3                 |3                 |conv_4_kernel
32                |32                |dense_1_units
0                 |0                 |dropout_rate
0.067794          |0.067794          |learning_rate

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 19/50
Epoch 4/50
Epoch 5/50
Epoch 8/50
Epoch 9/50
Epoch 1/50
Epoch 2/50
Epoch 6/50
 111/3125 [>.............................] - ETA: 8:38 - loss: 0

KeyboardInterrupt: 

In [None]:
best_model.summary()

In [None]:
tuner.results_summary()

In [None]:

# Plot learning curves
import pandas as pd
history_frame = pd.DataFrame(history.history)
history_frame.loc[:, ['loss', 'val_loss']].plot()
history_frame.loc[:, ['binary_accuracy', 'val_binary_accuracy']].plot();
