## Imports

In [1]:
from keras import utils

In [2]:
# Constantes
BATCH_SIZE = 64
IMG_HEIGHT = 160
IMG_WIDTH = 160
VAL_TEST_RATIO = 0.5

DATASET_PATH = "../cats_and_dogs"
SEED = 7654321  # semente para o split validação/teste com melhor distribuição


train_ds = utils.image_dataset_from_directory(
    DATASET_PATH + "/train",
    labels="inferred",
    label_mode="binary",
    seed=SEED,
    batch_size=BATCH_SIZE,
)

val_ds, test_ds = utils.image_dataset_from_directory(
    DATASET_PATH + "/validation",
    labels="inferred",
    label_mode="binary",
    validation_split=VAL_TEST_RATIO,
    subset="both",
    seed=SEED,
    batch_size=BATCH_SIZE,
)

# as labels foram inferidas a partir dos nomes dos diretórios
labels = train_ds.class_names
print("Encontradas as classes: ", labels)

# carregar os datasets em memória - uma vez carregados, a ordem dos batches já não muda
train_ds = train_ds.cache()
val_ds = val_ds.cache()
test_ds = test_ds.cache()

num_images_val_ds = sum(1 for _ in val_ds.unbatch())
num_images_test_ds = sum(1 for _ in test_ds.unbatch())

Found 2000 files belonging to 2 classes.
Found 1000 files belonging to 2 classes.
Using 500 files for training.
Using 500 files for validation.
Encontradas as classes:  ['cats', 'dogs']


2025-03-28 09:40:54.001305: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Pro
2025-03-28 09:40:54.001346: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 18.00 GB
2025-03-28 09:40:54.001350: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 6.00 GB
I0000 00:00:1743154854.001593   23585 pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
I0000 00:00:1743154854.001768   23585 pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
2025-03-28 09:40:54.327322: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2025-03-28 09:40:54.440832: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting

In [3]:
def class_distribution(dataset):
    """
    Calculate the distribution of classes in a given dataset.

    Args:
        dataset (tf.data.Dataset): A TensorFlow dataset containing data and labels.

    Returns:
        dict: A dictionary where keys are class labels (from the `labels` list)
              and values are the counts of each class in the dataset.
    """
    class_counts = {label: 0 for label in labels}
    for _, dataset_labels in dataset:
        for label in dataset_labels.numpy():
            class_counts[labels[int(label.item())]] += 1
    return class_counts

In [4]:
train_distribution = class_distribution(train_ds)
val_distribution = class_distribution(val_ds)
test_distribution = class_distribution(test_ds)

print("Distribuição das classes no conjunto de treino:", train_distribution)
print("Distribuição das classes no conjunto de validação:", val_distribution)
print("Distribuição das classes no conjunto de teste:", test_distribution)

Distribuição das classes no conjunto de treino: {'cats': 1000, 'dogs': 1000}
Distribuição das classes no conjunto de validação: {'cats': 251, 'dogs': 249}
Distribuição das classes no conjunto de teste: {'cats': 249, 'dogs': 251}


2025-03-28 09:40:54.906944: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
