In [1]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from keras.utils import image_dataset_from_directory
from tensorflow import keras
from keras import layers
from keras.activations import swish
from keras.callbacks import TensorBoard
import time
import matplotlib.pyplot as plt
from matplotlib import gridspec

2024-05-03 18:18:02.449048: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-03 18:18:02.449144: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-03 18:18:02.709557: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  2


In [3]:
input_dir = "/kaggle/input/cifar-10-fake-image-dataset/cifar-10-dataset/" #! Kaggle path
working_dir = "/kaggle/working/" #! Kaggle path

In [4]:
NAME = "cifakeCNN{}".format(time.strftime("%Y%m%d-%H%M%S"))
tensorboard = TensorBoard(log_dir="logs/{}".format(NAME))

In [5]:
# gpus = tf.config.list_physical_devices('GPU')
# if gpus:
#   # """Restrict TensorFlow to only allocate 1GB of memory on the first GPU"""
#   try:
#     tf.config.set_logical_device_configuration(
#         gpus[0],
#         [tf.config.LogicalDeviceConfiguration(memory_limit=1024)])
#     logical_gpus = tf.config.list_logical_devices('GPU')
#     print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
#   except RuntimeError as e:
#     # Virtual devices must be set before GPUs have been initialized
#     print(e)

In [6]:
#! """When running locally, limits the threads to 1 (only uses one thread of one CPU core)"""
# tf.config.threading.set_intra_op_parallelism_threads(1)
# tf.config.threading.set_inter_op_parallelism_threads(1)

In [7]:
#! """When running on Kaggle, copies the input directory cifake_hyperband to the working directory for
#! tuner continuity / cumulative session training (loading backups from the BackupAndRestore callback)."""
# from distutils.dir_util import copy_tree
# source_dir = "/kaggle/input/cifake_hyperband/"
# destination_dir = "/kaggle/working/"
# copy_tree(source_dir, destination_dir)

In [8]:
ds_train = image_dataset_from_directory(
    f'{input_dir}train',
    labels='inferred',
    label_mode='binary',
    image_size=[32,32],
    interpolation='nearest',
    batch_size=32,
    shuffle=True,
#     seed=69,
    #validation_split=None,
    subset=None,
    follow_links=False,
    crop_to_aspect_ratio=False,
)

Found 100000 files belonging to 2 classes.


In [9]:
ds_test = image_dataset_from_directory(
    f'{input_dir}test',
    labels='inferred',
    label_mode='binary',
    image_size=[32,32],
    interpolation='nearest',
    batch_size=32,
    shuffle=True,
#     seed=69,
    #validation_split=None,
    subset=None,
    follow_links=False,
    crop_to_aspect_ratio=False,
)

Found 20000 files belonging to 2 classes.


In [10]:
def convert_to_float(image, label):
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    return image, label

AUTOTUNE = tf.data.experimental.AUTOTUNE


In [11]:
ds_train = (
    ds_train
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)


In [12]:
ds_valid = (
    ds_test
    .map(convert_to_float)
    .cache()
    .prefetch(buffer_size=AUTOTUNE)
)

In [13]:
early_stopping = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=14,
    min_delta=0.001,
    restore_best_weights=True)

In [14]:
from keras.callbacks import ReduceLROnPlateau

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=12, min_lr=0.0008)

In [15]:
from keras.callbacks import ModelCheckpoint
timestamp = time.strftime("%Y%m%d-%H%M%S")
model_checkpoint = ModelCheckpoint(
    f'{working_dir}/best/best_model_{timestamp}.keras', monitor='val_loss', save_best_only=True, mode='min'
)

In [16]:
from keras.callbacks import LearningRateScheduler
def scheduler(epoch, lr):
    if epoch < 28:
        return lr
    else:
        return lr * 0.97 #if lr > 0.001 else lr * 2

schedule_lr = LearningRateScheduler(scheduler)

In [17]:
backup_dir = f"{working_dir}/backup/latest"
backup_callback = keras.callbacks.BackupAndRestore(
    backup_dir, save_freq="epoch", delete_checkpoint=True
)

In [18]:
def MBConv6(input_tensor, hp=None, out_channels=32, expansion=6, squeezing=4, use_se=True, dropout_rate=0.0):
    # inverted residual structure
    # pointwise convolution 1 expansion
    x = layers.Conv2D(expansion * input_tensor.shape[-1], (1,  1), padding='same', use_bias=False)(input_tensor)
    x = layers.BatchNormalization()(x)
    x = layers.Activation(swish)(x)

    # Depthwise Separable Convolution 
    x = layers.DepthwiseConv2D(kernel_size=(3,  3), strides=(1,  1), padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation(swish)(x)

    # pointwise convolution 2 bottleneck
    x = layers.Conv2D(out_channels, (1,  1), padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)

    # Dropout
    if dropout_rate >  0.0:
        x = layers.Dropout(dropout_rate)(x)

    # bottleneck
    x = layers.Conv2D(out_channels, (1,  1), padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)

    # Squeeze-and-Excitation
    if use_se:
        se_shape = (1,  1, out_channels)
        se = layers.GlobalAveragePooling2D()(x)
        se = layers.Reshape(se_shape)(se)
        se = layers.Conv2D(out_channels // squeezing, (1,  1), padding='same', use_bias=True)(se)
        se = layers.Activation(swish)(se)
        se = layers.Conv2D(out_channels, (1,  1), padding='same', use_bias=True)(se)
        se = layers.Activation('sigmoid')(se)
        x = layers.Multiply()([x, se])

    # Residual
    if input_tensor.shape[-1] == out_channels:
        shortcut = input_tensor
    else:
        shortcut = layers.Conv2D(out_channels, (1,  1), strides=(1,  1), padding='same', use_bias=False)(input_tensor)
        shortcut = layers.BatchNormalization()(shortcut)

    x = layers.Add()([x, shortcut])

    return x
input_tensor = layers.Input(shape=(32,  32,  16))
output_tensor = MBConv6(input_tensor)
model = keras.models.Model(inputs=input_tensor, outputs=output_tensor)

model.summary()

In [19]:

def MBConv1(input_tensor, hp=None, expansion=4, out_channels=16, strides=(1,   1), use_se=True):

    x = layers.Conv2D(expansion, (1,   1), padding='same', use_bias=False)(input_tensor)
    x = layers.BatchNormalization()(x)
    x = layers.Activation(swish)(x)


    x = layers.DepthwiseConv2D(kernel_size=(3,   3), strides=strides, padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation(swish)(x)


    x = layers.Conv2D(out_channels, (1,   1), padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)

    # Squeeze-and-Excitation
    if use_se:
        se_shape = (1,  1, out_channels)
        se = layers.GlobalAveragePooling2D()(x)
        se = layers.Reshape((1,  1, out_channels))(se)
        se = layers.Conv2D(out_channels //  2, (1,  1), padding='same', use_bias=True)(se)
        se = layers.Activation(swish)(se)
        se = layers.Conv2D(out_channels, (1,  1), padding='same', use_bias=True)(se)
        se = layers.Activation('sigmoid')(se)
        se = layers.Reshape(se_shape[1:])(se)  # Reshape to original
        x = layers.Multiply()([x, se])

    # Residual
    if input_tensor.shape[-1] == out_channels and strides == (1,   1):
        x = layers.Add()([x, input_tensor])
    else:
        input_tensor = layers.Conv2D(out_channels, (1,   1), strides=strides, padding='same', use_bias=False)(input_tensor)
        x = layers.Add()([x, input_tensor])

    return x

input_tensor = layers.Input(shape=(32,  32,  3))
output_tensor = MBConv1(input_tensor)
model = keras.models.Model(inputs=input_tensor, outputs=output_tensor)

model.summary()

In [20]:
!pip install keras-tuner
import keras_tuner
from keras_tuner import RandomSearch, Hyperband
from keras.models import Model
from keras.metrics import Accuracy, Precision, Recall, F1Score
from kerastuner import Objective



  from kerastuner import Objective


In [21]:
def build_model(hp=None):
    activation_function = 'swish'  #gelu, silu, leaky_relu etc were worse
    input_tensor = layers.Input(shape=[32,  32,  3])
    x = layers.Reshape(target_shape=(32, 32, 3))(input_tensor)

    if hp.Boolean('PRECONV_norm'):
        x = layers.BatchNormalization()(x)
    x = MBConv6(input_tensor, hp=hp, out_channels=hp.Int('out_channels_2', min_value=20, max_value=44, step=12),
                expansion=hp.Int('expansion_2', min_value=4, max_value=8, step=2),
                squeezing=hp.Choice('squeezing', values=[1, 2, 4]),
                use_se=hp.Boolean('use_se_2'),
                dropout_rate=hp.Float('conv6_dropout_rate_2', min_value=0.0, max_value=0.2, step=0.1))

    x = layers.Conv2D(filters=hp.Int('conv_filter', min_value=32, max_value=96, step=64),
                      kernel_size=hp.Choice('conv_kernel', values=[3,  7]),
                      activation=activation_function, padding='same',
                      kernel_regularizer=keras.regularizers.l2(0.003) if hp.Boolean('l2_reg') else None)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(hp.Float(f'dropout_INBCONV_rate', min_value=0.0, max_value=0.1, step=0.1))(x)
    x = layers.MaxPool2D()(x)
    x = layers.Conv2D(filters=hp.Int('conv_filter', min_value=32, max_value=96, step=64),
                      kernel_size=hp.Choice('conv_kernel', values=[3,  7]),
                      activation=activation_function, padding='same',
                      kernel_regularizer=keras.regularizers.l2(0.003) if hp.Boolean('l2_reg') else None)(x)
    if hp.Boolean('use_maxpool'):
        x = layers.MaxPool2D()(x)
    x = layers.MaxPool2D()(x)
    x = layers.BatchNormalization()(x)
    x = layers.Flatten()(x)


    for i in range(hp.Int('num_dense_layers', min_value=4, max_value=12, step=2)):
        x = layers.Dense(hp.Int(f'dense_units', min_value=256, max_value=512, step=256),
                         activation=activation_function,
                         kernel_regularizer=keras.regularizers.l2(0.003) if hp.Boolean('l2_reg') else None)(x)
        x = layers.Dropout(hp.Float(f'dropout_{i}_rate', min_value=0.0, max_value=0.6, step=0.1))(x)

    x = layers.Dense(1, activation='sigmoid')(x)
    optimizer_choice = hp.Choice('optimizer', values=['Adam', 'Adamax'])#'Adagrad', 'Adadelta'

    if optimizer_choice == 'Adamax':
        optimizer = tf.keras.optimizers.Adamax(learning_rate=hp.Float('adamax_learning_rate', min_value=0.002, max_value=0.01, sampling='LOG'))
    elif optimizer_choice == 'Adam':
        optimizer = tf.keras.optimizers.Adam(learning_rate=hp.Float('adam_learning_rate', min_value=0.0035, max_value=0.01, sampling='LOG'))


    model = Model(inputs=input_tensor, outputs=x)
    model.compile(optimizer=optimizer_choice,
                  loss='binary_crossentropy',
                  metrics=['binary_accuracy',
                      Precision(name='precision'),
                      Recall(name='recall'),
                      F1Score(name='f1_score'),
                      tf.keras.metrics.AUC(name='auc')])
    return model


In [22]:
auc_obj = Objective('val_auc', direction='max')

In [23]:

tuner = Hyperband(
    build_model,
    objective=auc_obj,
    max_epochs=35,
    factor=3,
    directory=f'{working_dir}/projects',
    project_name='cifake_hyperband')

In [24]:
tuner.search(ds_train, epochs=35, validation_data=ds_valid, callbacks=[early_stopping,  reduce_lr, schedule_lr]) #, backup_callback,tensorboard, verbose=0 model_checkpoint
best_models = tuner.get_best_models(num_models=10)
for i in range(10):
    best_models[i].save(f"{working_dir}/{i}BEST{NAME}.keras")
best_model = best_models[0]

Trial 90 Complete [00h 13m 36s]
val_auc: 0.9915252327919006

Best val_auc So Far: 0.9923116564750671
Total elapsed time: 05h 54m 13s


In [25]:
best_model.summary()

In [26]:
tuner.results_summary()

Results summary
Results in /kaggle/working//projects/cifake_hyperband
Showing 10 best trials
Objective(name="val_auc", direction="max")

Trial 0035 summary
Hyperparameters:
PRECONV_norm: False
out_channels_2: 32
expansion_2: 8
squeezing: 2
use_se_2: False
conv6_dropout_rate_2: 0.0
conv_filter: 96
conv_kernel: 7
l2_reg: False
dropout_INBCONV_rate: 0.1
use_maxpool: True
num_dense_layers: 4
dense_units: 512
dropout_0_rate: 0.5
dropout_1_rate: 0.5
dropout_2_rate: 0.5
dropout_3_rate: 0.2
optimizer: Adamax
adam_learning_rate: 0.004625134808211431
adamax_learning_rate: 0.0028052820648125135
dropout_4_rate: 0.5
dropout_5_rate: 0.1
dropout_6_rate: 0.4
dropout_7_rate: 0.4
dropout_8_rate: 0.4
dropout_9_rate: 0.30000000000000004
dropout_10_rate: 0.1
dropout_11_rate: 0.1
tuner/epochs: 4
tuner/initial_epoch: 2
tuner/bracket: 3
tuner/round: 1
tuner/trial_id: 0021
Score: 0.9923116564750671

Trial 0048 summary
Hyperparameters:
PRECONV_norm: True
out_channels_2: 44
expansion_2: 6
squeezing: 1
use_se_2: 

In [27]:
best_model.save(f"{working_dir}/BEST{NAME}.keras")

In [28]:

best_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]

# Output the best model's metrics
best_metrics = best_model.evaluate(ds_valid)
print("Validation Loss:", best_metrics[0])
print("Validation Binary Accuracy:", best_metrics[1])
print("Validation Precision:", best_metrics[2])
print("Validation Recall:", best_metrics[3])
print("Validation F1 Score:", best_metrics[4])

[1m 15/625[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m7s[0m 12ms/step - auc: 0.9882 - binary_accuracy: 0.9519 - f1_score: 0.6691 - loss: 0.1554 - precision: 0.9437 - recall: 0.9606

W0000 00:00:1714781883.641850      77 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m625/625[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 7ms/step - auc: 0.9923 - binary_accuracy: 0.9601 - f1_score: 0.6634 - loss: 0.1133 - precision: 0.9595 - recall: 0.9601
Validation Loss: 0.11282750219106674
Validation Binary Accuracy: 0.9595999717712402
Validation Precision: 0.9597839117050171
Validation Recall: 0.9593999981880188
Validation F1 Score: 0.6673561930656433
