In [1]:
import os
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import pandas as pd
import seaborn as sns
from tensorflow import keras
from keras import layers, regularizers, optimizers, losses
from keras.applications.mobilenet_v2 import MobileNetV2
from keras.applications.mobilenet_v2 import preprocess_input as mobilenet_preprocessing
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.metrics import classification_report, confusion_matrix
from keras.utils import set_random_seed, image_dataset_from_directory
from training import prepare_dataset

In [3]:
SEED = 42
DATASET_PATH = "../../../dataset/"
EPOCHS = 150
BATCH_SIZE = 64
AUTOTUNE = tf.data.AUTOTUNE
INPUT_SHAPE = (224, 224, 3, )

# `PYTHONHASHSEED` environment variable
os.environ['PYTHONHASHSEED'] = str(SEED)

# Python built-in random, numpy(+ scikit) and tensorflow seed
set_random_seed(SEED)

In [4]:
# Load the training dataset
print("Loading train dataset...")
train_dataset = image_dataset_from_directory(
    directory=os.path.join(DATASET_PATH, "train"),
    label_mode="categorical",
    validation_split=None,
    image_size=(224, 224),
    batch_size=None,
    seed=SEED
    )
print("Train dataset loaded!")
print("Labels in the dataset: ", train_dataset.class_names)

# Load the validation dataset
print("Loading validation dataset...")
val_dataset = image_dataset_from_directory(
    directory=os.path.join(DATASET_PATH, "valid"),
    label_mode="categorical",
    validation_split=None,
    image_size=(224, 224),
    batch_size=None,
    seed=SEED
    )
print("Validation dataset loaded!")

train_ds = train_dataset.shuffle(256, seed=SEED, reshuffle_each_iteration=False).take(int(len(train_dataset)*0.25))
val_ds = val_dataset.shuffle(128, seed=SEED, reshuffle_each_iteration=False).take(int(len(val_dataset)*0.25))

train_ds = prepare_dataset(train_ds, augment=True)
val_ds = prepare_dataset(val_ds)

len(train_ds), len(val_ds)

Loading train dataset...
Found 11220 files belonging to 2 classes.
Train dataset loaded!
Labels in the dataset:  ['savory', 'unsavory']
Loading validation dataset...
Found 600 files belonging to 2 classes.
Validation dataset loaded!


(88, 5)

In [5]:
import keras_tuner as kt

def build_model(hp):
    model = keras.models.Sequential()
    model.add(keras.Input(shape=(INPUT_SHAPE)))
    
    # Search first conv
    model.add(layers.Conv2D(
        filters=hp.Choice('conv_1_filter', values=[32, 64]),
        kernel_size=hp.Choice('conv_1_kernel', values = [3,5]),
        activation='relu',
        kernel_regularizer=regularizers.l2(1e-2),
        strides=(1, 1)
    ))
    model.add(layers.MaxPooling2D(pool_size=hp.Choice('pool_1_size', values = [3,5])))
    
    # Choose how many conv layers
    for i in range(hp.Int("num_Convolutional_layers", 1, 2)):
        model.add(
            layers.Conv2D(
                filters=hp.Choice(f"conv_{i}_filters", values=[64, 128, 256]),
                kernel_size=(3, 3),
                activation="relu",
                kernel_regularizer=regularizers.l2(1e-2),
                strides=(1, 1)
            )
        )
        model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.BatchNormalization())

    # Choose how many classifier
    for i in range(hp.Int("num_FullyConnected_layers", 1, 2)):
        model.add(
            layers.Dense(
                # Tune number of units separately.
                units=hp.Choice(f"units_{i}", values=[64, 128, 256]),
                activation="relu",
                kernel_regularizer=regularizers.l2(1e-2)
            )
        )
        if hp.Boolean("dropout"): model.add(layers.Dropout(rate=0.25))
    
    model.add(layers.Dense(2, activation="softmax"))

    # Choose the optimizer
    hp_optimizer = hp.Choice('optimizer', values=['adam', 'rmsprop', 'adamax'], default = 'adamax')
    optimizer = tf.keras.optimizers.get(hp_optimizer)
    # Choose the learning rate
    optimizer.learning_rate = hp.Choice('learning_rate', values=[1e-3, 1e-2], default = 1e-3)
                                        
    model.compile(optimizer=optimizer, 
                    loss="categorical_crossentropy", 
                    metrics = ["accuracy"])

    return model

In [6]:
os.makedirs("./keras_tuner/", exist_ok=True)
tuner = kt.BayesianOptimization(build_model,
                                objective=kt.Objective('val_loss', direction="min"),
                                directory='./keras_tuner',
                                max_trials = 20, overwrite=False,
                                project_name='tuned_model')

In [7]:
# The combination of all parameters
tuner.search_space_summary()

Search space summary
Default search space size: 10
conv_1_filter (Choice)
{'default': 32, 'conditions': [], 'values': [32, 64], 'ordered': True}
conv_1_kernel (Choice)
{'default': 3, 'conditions': [], 'values': [3, 5], 'ordered': True}
pool_1_size (Choice)
{'default': 3, 'conditions': [], 'values': [3, 5], 'ordered': True}
num_Convolutional_layers (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 2, 'step': 1, 'sampling': None}
conv_0_filters (Choice)
{'default': 64, 'conditions': [], 'values': [64, 128, 256], 'ordered': True}
num_FullyConnected_layers (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 2, 'step': 1, 'sampling': None}
units_0 (Choice)
{'default': 64, 'conditions': [], 'values': [64, 128, 256], 'ordered': True}
dropout (Boolean)
{'default': False, 'conditions': []}
optimizer (Choice)
{'default': 'adamax', 'conditions': [], 'values': ['adam', 'rmsprop', 'adamax'], 'ordered': False}
learning_rate (Choice)
{'default': 0.001, 'conditi

In [8]:
early_stopping = EarlyStopping(monitor="val_loss", patience=12, min_delta=0.0001, restore_best_weights=True, verbose=1)
lr_scheduler = ReduceLROnPlateau(monitor="val_loss", factor=0.1, patience=5, verbose=1)
callbacks = [early_stopping, lr_scheduler]
# Search best hyperparameter
tuner.search(train_ds, epochs=150, validation_data=val_ds, shuffle=True, callbacks=callbacks)

Trial 20 Complete [00h 00m 40s]
val_loss: 0.6577940583229065

Best val_loss So Far: 0.5978061556816101
Total elapsed time: 00h 15m 29s
INFO:tensorflow:Oracle triggered exit


In [9]:
# Get the top model
models = tuner.get_best_models(num_models=1)
best_model = models[0]
# Build the model.
# best_model.build(input_shape=INPUT_SHAPE)
best_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 222, 222, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 44, 44, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 42, 42, 256)       73984     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 21, 21, 256)      0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 19, 19, 64)        147520    
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 9, 9, 64)         0

In [10]:
tuner.results_summary()

Results summary
Results in ./keras_tuner\tuned_model
Showing 10 best trials
<keras_tuner.engine.objective.Objective object at 0x0000020116CF8EE0>
Trial summary
Hyperparameters:
conv_1_filter: 32
conv_1_kernel: 3
pool_1_size: 5
num_Convolutional_layers: 2
conv_0_filters: 256
num_FullyConnected_layers: 2
units_0: 64
dropout: True
optimizer: adam
learning_rate: 0.001
conv_1_filters: 64
units_1: 64
Score: 0.5978061556816101
Trial summary
Hyperparameters:
conv_1_filter: 32
conv_1_kernel: 3
pool_1_size: 5
num_Convolutional_layers: 2
conv_0_filters: 64
num_FullyConnected_layers: 2
units_0: 64
dropout: True
optimizer: adam
learning_rate: 0.001
conv_1_filters: 64
units_1: 64
Score: 0.603837788105011
Trial summary
Hyperparameters:
conv_1_filter: 32
conv_1_kernel: 3
pool_1_size: 5
num_Convolutional_layers: 2
conv_0_filters: 128
num_FullyConnected_layers: 2
units_0: 256
dropout: True
optimizer: adam
learning_rate: 0.001
conv_1_filters: 64
units_1: 64
Score: 0.6052907705307007
Trial summary
Hyperpa

In [11]:
best_model.save("./keras_tuner/" + 'best_hyperparameter_tuned_model')



INFO:tensorflow:Assets written to: ./keras_tuner/best_hyperparameter_tuned_model\assets


INFO:tensorflow:Assets written to: ./keras_tuner/best_hyperparameter_tuned_model\assets


In [12]:
a = tuner.get_best_hyperparameters(num_trials=1)[0]
model = tuner.hypermodel.build(a)

In [13]:
early_stopping = EarlyStopping(monitor="val_loss", patience=12, min_delta=0.0001, restore_best_weights=True, verbose=1)
lr_scheduler = ReduceLROnPlateau(monitor="val_loss", factor=0.1, patience=5, verbose=1)
callbacks = [early_stopping, lr_scheduler]

In [14]:
train_ds = prepare_dataset(train_dataset, augment=True)
val_ds = prepare_dataset(val_dataset)

In [19]:
model.fit(train_ds, epochs=50, callbacks=callbacks, validation_data=val_ds)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 6: ReduceLROnPlateau reducing learning rate to 1.0000001111620805e-07.
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 11: ReduceLROnPlateau reducing learning rate to 1.000000082740371e-08.
Epoch 12/50
Epoch 13/50
Epoch 13: early stopping


<keras.callbacks.History at 0x20117a9cc40>

In [23]:
# Load the test dataset
print("Loading test dataset...")
test_dataset = image_dataset_from_directory(
    directory=os.path.join(DATASET_PATH, "test"),
    label_mode="categorical",
    validation_split=None,
    image_size=(224, 224),
    batch_size=None,
    seed=SEED
    )
print("Test dataset loaded!")

X_test, y_test = [], []
for image, label in test_dataset:
    image /= 255.
    X_test.append(image.numpy())
    y_test.append(label.numpy())
         
X_test = tf.convert_to_tensor(np.asarray(X_test, dtype='float32'))
y_test = np.asarray(y_test, dtype='float32')
    
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
test_dataset = test_dataset.batch(BATCH_SIZE)

Loading test dataset...
Found 600 files belonging to 2 classes.
Test dataset loaded!


In [24]:
score = model.evaluate(test_dataset)
print(f"Test Loss: {score[0]}")
print(f"Test Accuracy: {score[1]}")
    
predictions = model.predict(X_test)   
predictions = np.argmax(predictions, axis=1)
y_test = np.argmax(y_test, axis=1)
print(classification_report(y_test, predictions))
print(confusion_matrix(y_test, predictions))

Test Loss: 0.561095654964447
Test Accuracy: 0.746666669845581
              precision    recall  f1-score   support

           0       0.68      0.94      0.79       300
           1       0.91      0.55      0.68       300

    accuracy                           0.75       600
   macro avg       0.79      0.75      0.74       600
weighted avg       0.79      0.75      0.74       600

[[283  17]
 [135 165]]
