In [None]:
import tensorflow as tf
from keras.layers import GlobalAveragePooling2D, Dense, Conv2D, MaxPooling2D, Dropout, BatchNormalization
from keras.models import Sequential
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
import keras_cv
import keras

In [None]:
# Import our data

TRAIN_DATADIR = "../data/train_directory"
VAL_DATADIR = "../data/val_directory"
TEST_DATADIR = "../data/test_directory"
BATCH_SIZE = 128  # Change if running into memory issues

train_ds = keras.utils.image_dataset_from_directory(
    TRAIN_DATADIR,
    labels="inferred",
    label_mode="categorical",
    class_names=None,
    color_mode="rgb",
    batch_size=BATCH_SIZE,
    image_size=(224, 224),
    shuffle=True,
    seed = 0
)

val_ds = keras.utils.image_dataset_from_directory(
    VAL_DATADIR,
    labels="inferred",
    label_mode="categorical",
    class_names=None,
    color_mode="rgb",
    batch_size=BATCH_SIZE,
    image_size=(224, 224),
    shuffle=False,
)

test_ds = keras.utils.image_dataset_from_directory(
    TEST_DATADIR,
    labels="inferred",
    label_mode="categorical",
    class_names=None,
    color_mode="rgb",
    batch_size=1,
    image_size=(224, 224),
    shuffle=False,
)

### Augmentations

In [None]:
# Randomly flip the image horizontally and vertically
random_flip = keras_cv.layers.RandomFlip(mode="horizontal_and_vertical")

# Randomly crop and resize the image
crop_and_resize = keras_cv.layers.RandomCropAndResize(
    target_size=(224, 224),
    crop_area_factor=(0.8, 1.0),
    aspect_ratio_factor=(0.9, 1.1)
)

# Apply some random augmentations
rand_augment = keras_cv.layers.RandAugment(
    augmentations_per_image=3,
    value_range=(0, 1),
    magnitude=0.5,
    magnitude_stddev=0.2,
    rate=1.0
)

# Merge multiple augmentations into a single augmentation
# Stays more true to the original image than cutmix or mixup
aug_mix = keras_cv.layers.AugMix(
    [0,1],
    severity=0.3,
    num_chains=3,
    chain_depth=[1, 3],
    alpha=1.0,
    seed=0
)

# Cut parts of the image and paste them on other images
cut_mix = keras_cv.layers.preprocessing.CutMix()

# Mix two images together
mix_up = keras_cv.layers.preprocessing.MixUp()

# Randomly choose between CutMix and MixUp
cut_mix_or_mix_up = keras_cv.layers.RandomChoice([cut_mix, mix_up], batchwise=True)

# Define the augmentation function
def augmenter_train(images, labels):
    images = tf.cast(images, tf.float32) / 255.0
    images = random_flip(images, training=True)
    images = crop_and_resize(images, training=True)
    #inputs = rand_augment(inputs, training=True)
    #images = aug_mix(images, training=True)
    #inputs = cut_mix_or_mix_up(inputs)

    return images, labels

def augmenter_val(images, labels):
    images = tf.cast(images, tf.float32) / 255.0

    return images, labels

In [None]:
train_ds = train_ds.map(augmenter_train, num_parallel_calls=tf.data.AUTOTUNE).prefetch(buffer_size=tf.data.AUTOTUNE)

val_ds = val_ds.map(augmenter_val, num_parallel_calls=tf.data.AUTOTUNE).prefetch(buffer_size=tf.data.AUTOTUNE)

test_ds = test_ds.map(augmenter_val, num_parallel_calls=tf.data.AUTOTUNE)

In [None]:
# Setup callbacks
checkpoint_callback = ModelCheckpoint(
    'best_model.keras',                 # Path where the model is saved
    monitor='val_loss',                 # Metric to monitor
    save_best_only=True,                # Save only the best model
    save_weights_only=False,            # Save entire model not just weights
    verbose=1                           # Logging level
)

early_stopping_callback = EarlyStopping(
    monitor='val_loss',                 # Metric to monitor
    patience=10,                        # Number of epochs with no improvement after which training will be stopped
    verbose=1                           # Logging level
)

reduce_lr_callback = ReduceLROnPlateau(
    monitor='val_loss',                 # Metric to monitor
    factor=0.2,                         # Factor by which the learning rate will be reduced
    patience=5,                         # Number of epochs with no improvement after which learning rate will be reduced
    min_lr=0.001,                       # Lower bound on the learning rate
    verbose=1                           # Logging level
)


## Looking at the Architecture of Model first

In [None]:
IMG_HEIGHT = 224
IMG_WIDTH = 224

def create_model(filters=(64, 128, 256), kernel_size=(7, 7),
                 add_conv_block=False, extra_dense_layers=0, use_pooling=True, img_height=IMG_HEIGHT, img_width=IMG_WIDTH):
    model = Sequential()

    # First Convolutional Block
    model.add(Conv2D(filters[0], kernel_size, activation='relu', padding='same', input_shape=(img_height, img_width, 3), kernel_initializer='he_normal'))
    model.add(BatchNormalization())
    if use_pooling:
        model.add(MaxPooling2D((2, 2)))

    # Second Convolutional Block
    model.add(Conv2D(filters[1], kernel_size, activation='relu', padding='same', kernel_initializer='he_normal'))
    model.add(BatchNormalization())
    if use_pooling:
        model.add(MaxPooling2D((2, 2)))

    # Optional Third Convolutional Block
    model.add(Conv2D(filters[2], kernel_size, activation='relu', padding='same', kernel_initializer='he_normal'))
    model.add(BatchNormalization())
    if add_conv_block:
        model.add(Conv2D(filters[2], kernel_size, activation='relu', padding='same', kernel_initializer='he_normal'))
        model.add(BatchNormalization())
        if use_pooling:
            model.add(MaxPooling2D((2, 2)))

    # Global Average Pooling
    model.add(GlobalAveragePooling2D())

    # Base Dense Layers
    model.add(Dense(512, activation='relu', kernel_initializer='he_normal'))
    model.add(Dropout(0.5))
    model.add(BatchNormalization())

    model.add(Dense(256, activation='relu', kernel_initializer='he_normal'))
    model.add(Dropout(0.5))
    model.add(BatchNormalization())

    # Additional Dense Layers based on the parameter
    for _ in range(extra_dense_layers):
        model.add(Dense(256, activation='relu', kernel_initializer='he_normal'))
        model.add(Dropout(0.5))
        model.add(BatchNormalization())

    # Output Layer
    model.add(Dense(11, activation='softmax'))

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
def evaluate_models(models_to_test, train_dataset, val_dataset):
    results = []
    for model in models_to_test:
        history = model.fit(
            train_dataset,
            steps_per_epoch=len(train_dataset)//BATCH_SIZE,
            epochs=10,
            validation_data=val_dataset,
            validation_steps=len(val_dataset)//BATCH_SIZE,
            callbacks=[checkpoint_callback, early_stopping_callback, reduce_lr_callback]
        )
        max_val_accuracy = max(history.history['val_accuracy'])
        results.append((model, max_val_accuracy))
    return results


# Define, create, evaluate models with various configurations
configs = [
    {'filters': (64, 128, 256), 'kernel_size': (7, 7), 'add_conv_block': False, 'extra_dense_layers': 0, 'use_pooling': True},
    {'filters': (64, 128, 256), 'kernel_size': (7, 7), 'add_conv_block': True, 'extra_dense_layers': 0, 'use_pooling': True},
    {'filters': (64, 128, 256), 'kernel_size': (7, 7), 'add_conv_block': False, 'extra_dense_layers': 1, 'use_pooling': True},
    {'filters': (64, 128, 256), 'kernel_size': (7, 7), 'add_conv_block': True, 'extra_dense_layers': 1, 'use_pooling': True},
    {'filters': (64, 128, 256), 'kernel_size': (7, 7), 'add_conv_block': False, 'extra_dense_layers': 3, 'use_pooling': True},
    {'filters': (64, 128, 256), 'kernel_size': (7, 7), 'add_conv_block': True, 'extra_dense_layers': 3, 'use_pooling': True},
    {'filters': (64, 128, 256), 'kernel_size': (7, 7), 'add_conv_block': False, 'extra_dense_layers': 2, 'use_pooling': True},
    {'filters': (64, 128, 256), 'kernel_size': (7, 7), 'add_conv_block': True, 'extra_dense_layers': 2, 'use_pooling': True},
    {'filters': (32, 64, 128), 'kernel_size': (7, 7), 'add_conv_block': False, 'extra_dense_layers': 0, 'use_pooling': True},
]

models_to_test = [create_model(**config) for config in configs]
results = evaluate_models(models_to_test, train_ds, val_ds)

# Print out results
for config, (model, accuracy) in zip(configs, results):
    print("Model configuration:", config)
    print("Achieved maximum validation accuracy:", accuracy)

Output:

*   Model configuration: {'filters': (64, 128, 256), 'kernel_size': (7, 7), 'add_conv_block': False, 'extra_dense_layers': 0, 'use_pooling': True}
Achieved maximum validation accuracy: 0.6000000238418579
*Model configuration: {'filters': (64, 128, 256), 'kernel_size': (7, 7), 'add_conv_block': True, 'extra_dense_layers': 0, 'use_pooling': True}
Achieved maximum validation accuracy: 0.6000000238418579
* Model configuration: {'filters': (64, 128, 256), 'kernel_size': (7, 7), 'add_conv_block': False, 'extra_dense_layers': 1, 'use_pooling': True}
Achieved maximum validation accuracy: 0.4000000059604645
* Model configuration: {'filters': (64, 128, 256), 'kernel_size': (7, 7), 'add_conv_block': True, 'extra_dense_layers': 1, 'use_pooling': True}
Achieved maximum validation accuracy: 0.3456125855445862
* Model configuration: {'filters': (64, 128, 256), 'kernel_size': (7, 7), 'add_conv_block': False, 'extra_dense_layers': 3, 'use_pooling': True}
Achieved maximum validation accuracy: 0.2574503421783447
* Model configuration: {'filters': (64, 128, 256), 'kernel_size': (7, 7), 'add_conv_block': True, 'extra_dense_layers': 3, 'use_pooling': True}
Achieved maximum validation accuracy: 0.4000000059604645
* Model configuration: {'filters': (64, 128, 256), 'kernel_size': (7, 7), 'add_conv_block': False, 'extra_dense_layers': 2, 'use_pooling': True}
Achieved maximum validation accuracy: 0.4000000059604645
* Model configuration: {'filters': (64, 128, 256), 'kernel_size': (7, 7), 'add_conv_block': True, 'extra_dense_layers': 2, 'use_pooling': True}
Achieved maximum validation accuracy: 0.4000000059604645
* Model configuration: {'filters': (32, 64, 128), 'kernel_size': (7, 7), 'add_conv_block': False, 'extra_dense_layers': 0, 'use_pooling': True}
Achieved maximum validation accuracy: 0.6000000238418579

## Looking at Hyperparameters

In [None]:
import keras_tuner as kt

def build_model(hp):
    model = Sequential()

    # First Convolutional Block
    model.add(Conv2D(64, (7, 7), activation='relu', padding='same', input_shape=(256, 256, 3), kernel_initializer='he_normal'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))

    # Second Convolutional Block
    model.add(Conv2D(128, (7, 7), activation='relu', padding='same', kernel_initializer='he_normal'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))

    # Third Convolutional Block
    model.add(Conv2D(256, (7, 7), activation='relu', padding='same', kernel_initializer='he_normal'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))

    # Global Average Pooling
    model.add(GlobalAveragePooling2D())

    # Dense Layers
    model.add(Dense(512, activation='relu', kernel_initializer='he_normal'))
    model.add(Dropout(hp.Choice('dropout_rate', values=[0.3, 0.5, 0.7])))
    model.add(BatchNormalization())

    model.add(Dense(256, activation='relu', kernel_initializer='he_normal'))
    model.add(Dropout(hp.Choice('dropout_rate', values=[0.3, 0.5, 0.7])))
    model.add(BatchNormalization())

    # Output Layer
    model.add(Dense(11, activation='softmax'))

    learning_rate = hp.Choice('learning_rate', values=[0.0001, 0.001, 0.01, 0.1])
    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# Initialize the tuner
tuner = kt.RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=10,
    executions_per_trial=1,
    directory='logs',
    project_name='hyperparam_tuning'
)

# Define the early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Run the hyperparameter search
tuner.search(
    train_ds,
    validation_data=val_ds,
    epochs=10,
    callbacks=[early_stopping]
)

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]
print(f'Best Hyperparameters: {best_hps.values}')

# Build the best model and train it
model = tuner.hypermodel.build(best_hps)
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=10,
    callbacks=[early_stopping]
)

**Training with Learning Rate: 0.0001, Batch Size: 16, Dropout Rate: 0.3**


Trial 1:
Hyperparameters: {'dropout_rate': 0.3, 'learning_rate': 0.0001}
Score: 0.6480793356895447

Trial 2:
Hyperparameters: {'dropout_rate': 0.3, 'learning_rate': 0.01}
Score: 0.5555555820465088

Trial 3:
Hyperparameters: {'dropout_rate': 0.7, 'learning_rate': 0.0001}
Score: 0.5340768098831177

Trial 4:
Hyperparameters: {'dropout_rate': 0.5, 'learning_rate': 0.001}
Score: 0.47211897373199463

Trial 5:
Hyperparameters: {'dropout_rate': 0.3, 'learning_rate': 0.001}
Score: 0.45105329155921936

Trial 6:
Hyperparameters: {'dropout_rate': 0.7, 'learning_rate': 0.01}
Score: 0.4089219272136688

Trial 7:
Hyperparameters: {'dropout_rate': 0.5, 'learning_rate': 0.01}
Score: 0.3238331377506256

Trial 8:
Hyperparameters: {'dropout_rate': 0.7, 'learning_rate': 0.001}
Score: 0.29120197892189026

Trial 9:
Hyperparameters: {'dropout_rate': 0.5, 'learning_rate': 0.1}
Score: 0.26517966389656067

Trial 10:
Hyperparameters: {'dropout_rate': 0.3, 'learning_rate': 0.1}
Score: 0.2593969404697418
