## Imports

In [1]:
from keras import layers, callbacks, utils, metrics, Sequential, optimizers, initializers

## Constants

In [2]:
# Constantes
IS_TO_FIND_BEST_HYPERPARAMS = True
IS_TO_TRAIN = True
IS_TO_REPLICATE = True

BATCH_SIZE = 64
IMG_HEIGHT = 256
IMG_WIDTH = 256
LEARNING_RATE = 1e-5

DATASET_PATH = "../../cats_and_dogs"
DATASET_TRAIN_PATH = f"{DATASET_PATH}/train"
DATASET_VAL_PATH = f"{DATASET_PATH}/validation"

NUM_CLASSES = 2

SEED = 7654321

VAL_TEST_RATIO = 0.5

MAX_EPOCHS = 100

WEIGHTS_FILE_EXT = "weights.h5"
HYPERPARAMS_FILE_EXT = "hyperparams.json"

# callback para parar o treino caso não se verifiquem melhorias na loss
EARLY_STOPPING = callbacks.EarlyStopping(monitor="val_loss", patience=5)

INITIALIZER = initializers.RandomNormal(mean=0.0, stddev=0.05, seed=SEED)
GLOROT_UNIFORM_INITIALIZER = initializers.GlorotUniform(seed=SEED)

DEFAULT_LOSS = "binary_crossentropy"
DEFAULT_OPTIMIZER = optimizers.Adam(learning_rate=LEARNING_RATE)
DEFAULT_METRICS = ["accuracy", metrics.F1Score]

2025-03-24 14:52:54.181179: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Pro
2025-03-24 14:52:54.181205: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 18.00 GB
2025-03-24 14:52:54.181208: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 6.00 GB
I0000 00:00:1742827974.181220 1341139 pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
I0000 00:00:1742827974.181236 1341139 pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


### Dataset load

In [3]:
train_ds = utils.image_dataset_from_directory(
    DATASET_TRAIN_PATH,
    labels="inferred",
    label_mode="binary",
    seed=SEED,
    batch_size=BATCH_SIZE,
    image_size=(IMG_WIDTH, IMG_HEIGHT),
    verbose=False,
)

val_ds, test_ds = utils.image_dataset_from_directory(
    DATASET_VAL_PATH,
    labels="inferred",
    label_mode="binary",
    validation_split=VAL_TEST_RATIO,
    subset="both",
    seed=SEED,
    image_size=(IMG_WIDTH, IMG_HEIGHT),
    batch_size=BATCH_SIZE,
    verbose=False,
)

# load the datasets into memory - once loaded, the order of the batches will no longer change
train_ds = train_ds.cache()
val_ds = val_ds.cache()
test_ds = test_ds.cache()

Using 500 files for training.
Using 500 files for validation.


Challenge: Find the best architecture that can generalize very well the problem features, and classify each animal with minimal error.

* Higher number of convolutional layers will produce more feature extraction from the training dataset, it can indicate a tendency to overfit the model.
* Higher number of pooling layers will subsample the image over the original size, the model will not extract correctly the differences in each class for each feature.
* ...

rephrase these topics.

Pressupostos:
* Número de filtros em cada camada: 32 x degrau do nível hierarquico, ex: 1º nível 32, 2º nível 64, etc. (explicar definição de nível hierárquico)
* Pooling technique: Max Pooling

### Procedure to define the best model considering the above assumptions:

Identify:
1. the best CNN architecture
2. the best filter size
3. best pooling size
4. the dense layer number of neurons
5. data augmentation hyperparameters

ref: https://github.com/dnouri/nolearn/blob/master/docs/notebooks/CNN_tutorial.ipynb

In [4]:
mix_cnn_model = Sequential(
    [
        layers.Input(shape=(IMG_HEIGHT, IMG_WIDTH, 3)),

        # Low level features
        layers.Conv2D(32, 3, padding="same", activation="relu", kernel_initializer=GLOROT_UNIFORM_INITIALIZER),
        layers.Conv2D(32, 3, padding="same", activation="relu", kernel_initializer=GLOROT_UNIFORM_INITIALIZER),
        layers.Conv2D(32, 3, padding="same", activation="relu", kernel_initializer=GLOROT_UNIFORM_INITIALIZER),
        layers.Conv2D(32, 3, padding="same", activation="relu", kernel_initializer=GLOROT_UNIFORM_INITIALIZER),
        layers.MaxPooling2D(pool_size=(4, 4)),

        layers.Conv2D(32, 3, padding="same", activation="relu", kernel_initializer=GLOROT_UNIFORM_INITIALIZER),
        layers.Conv2D(32, 3, padding="same", activation="relu", kernel_initializer=GLOROT_UNIFORM_INITIALIZER),
        layers.Conv2D(32, 3, padding="same", activation="relu", kernel_initializer=GLOROT_UNIFORM_INITIALIZER),
        layers.MaxPooling2D(pool_size=(4, 4)),

        # Mid level features
        layers.Conv2D(64, 3, padding="same", activation="relu", kernel_initializer=GLOROT_UNIFORM_INITIALIZER),
        layers.Conv2D(64, 3, padding="same", activation="relu", kernel_initializer=GLOROT_UNIFORM_INITIALIZER),
        layers.Conv2D(64, 3, padding="same", activation="relu", kernel_initializer=GLOROT_UNIFORM_INITIALIZER),
        layers.MaxPooling2D(),

        layers.Conv2D(64, 3, padding="same", activation="relu", kernel_initializer=GLOROT_UNIFORM_INITIALIZER),
        layers.Conv2D(64, 3, padding="same", activation="relu", kernel_initializer=GLOROT_UNIFORM_INITIALIZER),
        layers.MaxPooling2D(),

        # High level features
        layers.Conv2D(96, 3, padding="same", activation="relu", kernel_initializer=GLOROT_UNIFORM_INITIALIZER),
        layers.Conv2D(96, 3, padding="same", activation="relu", kernel_initializer=GLOROT_UNIFORM_INITIALIZER),
        layers.MaxPooling2D(),

        layers.Conv2D(96, 3, padding="same", activation="relu", kernel_initializer=GLOROT_UNIFORM_INITIALIZER),
        layers.MaxPooling2D(),

        # Hidden layer
        layers.Dense(128, activation="relu", kernel_initializer=INITIALIZER),
        layers.Dense(160, activation="relu", kernel_initializer=INITIALIZER),

        # Classifier
        layers.Flatten(),
        layers.Dense(1, activation="sigmoid", kernel_initializer=INITIALIZER)
    ], name="mix_cnn_model"
)

mix_cnn_model.compile(
    loss=DEFAULT_LOSS,
    optimizer=DEFAULT_OPTIMIZER,
    metrics=DEFAULT_METRICS
)

In [5]:
mix_cnn_model.summary()

In [None]:
mix_cnn_model_checkpoint = callbacks.ModelCheckpoint(
    filepath=f"../models/{mix_cnn_model.name}.{WEIGHTS_FILE_EXT}",
    save_weights_only=True,
    monitor="val_loss",
    mode="min",
    save_best_only=True,
)

mix_cnn_model_history = mix_cnn_model.fit(
    train_ds,
    epochs=MAX_EPOCHS,
    validation_data=val_ds,
    callbacks=[mix_cnn_model_checkpoint, EARLY_STOPPING],
)

Epoch 1/100


2025-03-24 14:52:55.372581: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 410ms/step - accuracy: 0.5145 - f1_score: 0.6532 - loss: 0.6933 - val_accuracy: 0.5020 - val_f1_score: 0.6649 - val_loss: 0.6933
Epoch 2/100
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 387ms/step - accuracy: 0.5145 - f1_score: 0.6532 - loss: 0.6930 - val_accuracy: 0.5020 - val_f1_score: 0.6649 - val_loss: 0.6931
Epoch 3/100
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 384ms/step - accuracy: 0.5145 - f1_score: 0.6532 - loss: 0.6928 - val_accuracy: 0.5020 - val_f1_score: 0.6649 - val_loss: 0.6929
Epoch 4/100
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 368ms/step - accuracy: 0.5140 - f1_score: 0.6532 - loss: 0.6924 - val_accuracy: 0.5060 - val_f1_score: 0.6649 - val_loss: 0.6922
Epoch 5/100
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 385ms/step - accuracy: 0.5182 - f1_score: 0.6532 - loss: 0.6915 - val_accuracy: 0.5660 - val_f1_score: 0.6649 -