# Image classifier

In [None]:
import numpy as np
import tensorflow as tf

from sklearn.metrics import roc_auc_score, roc_curve

# Load the data

In [None]:
import os


data_dir = os.path.abspath('../data/raw/train/')

# train_ds = tf.keras.utils.image_dataset_from_directory(
#     directory=data_dir,
#     labels='inferred',
#     seed=42,
#     batch_size=32
# )

# set the images width and height
img_width = 224
img_height = 224

# train dataset
train_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="training",
    seed=42,
    image_size=(img_height, img_width),
    labels='inferred',
)

# validation dataset
val_ds = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    validation_split=0.2,
    subset="validation",
    seed=42,
    image_size=(img_height, img_width),
    labels='inferred',
)

In [None]:
train_ds

In [None]:
class_names = train_ds.class_names
print(class_names)

Where the labels are:
- class_a - cat
- class_b - dog

# Visualize the data

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(images[i].numpy().astype("uint8"))
    plt.title(class_names[labels[i]])
    plt.axis("off")

Let's verify what is the content of train_ds

In [None]:
for image_batch, labels_batch in train_ds:
  print(image_batch.shape)
  print(labels_batch.shape)
  break

The image_batch is a tensor of the shape (32, 256, 256, 3). This is a batch of 32 images of shape 256x256x3 (the last dimension refers to color channels RGB and the images are 256x256 because that is the default value of the **image_dataset_from_directory** function). The label_batch is a tensor of the shape (32,), these are corresponding **labels** to the 32 images.

# Configure the dataset for performance

We are going to use buffered prefetching, so we can yield data from disk without having I/O become blocking. The two important methods we should use when loading data are:

- **Dataset.cache** keeps the images in memory after they're loaded off disk during the first epoch. This will ensure the dataset does not become a bottleneck while training your model. If your dataset is too large to fit into memory, you can also use this method to create a performant on-disk cache.
- **Dataset.prefetch** overlaps data preprocessing and model execution while training.
Interested readers can learn more about both methods, as well as how to cache data to disk in the Prefetching section of the Better performance with the tf.data API guide.

In [None]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

# Standardize the data

The RGB channel values are in the [0, 255] range. This is not ideal for a neural network; in general you should seek to make your input values small.

Now we standardize values to be in the [0, 1] range by using tf.keras.layers.Rescaling:

In [None]:
normalization_layer = tf.keras.layers.Rescaling(1./255)

Now there are two ways to use this layer. We can apply it to the dataset by calling Dataset.map:

In [None]:
# normalize all the data
normalized_train_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))

# let's print the first image to see the normalization
image_batch, labels_batch = next(iter(normalized_train_ds))
first_image = image_batch[0]
print('Min and Max values:', np.min(first_image), '-', np.max(first_image))

Or we can include this layer in the model, the same can be done with the Resizing using the **tf.keras.layers.Resizing** layer.

# Now let's create the model (CNN)

This time we are following the models given to use:

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers

num_classes = len(class_names)

model = Sequential([
    # here is where the rescaling layer can be
    # layers.Rescaling(1./255, input_shape=(256, 256, 3)),
    layers.Conv2D(input_shape = (224,224,3), filters = 64, kernel_size = (3,3), padding = "same", activation = "relu"),
    layers.Conv2D(filters = 64,kernel_size = (3,3),padding = "same", activation = "relu"),
    layers.MaxPool2D(pool_size = (2,2),strides = (2,2)),
    layers.Conv2D(filters = 128, kernel_size = (3,3), padding = "same", activation = "relu"),
    layers.Conv2D(filters = 128, kernel_size = (3,3), padding = "same", activation = "relu"),
    layers.MaxPool2D(pool_size = (2,2),strides = (2,2)),
    layers.Conv2D(filters = 256, kernel_size = (3,3), padding = "same", activation = "relu"),
    layers.Conv2D(filters = 256, kernel_size = (3,3), padding = "same", activation = "relu"),
    layers.Conv2D(filters = 256, kernel_size = (3,3), padding = "same", activation = "relu"),
    layers.MaxPool2D(pool_size = (2,2),strides = (2,2)),
    layers.Conv2D(filters = 512, kernel_size = (3,3), padding = "same", activation = "relu"),
    layers.Conv2D(filters = 512, kernel_size = (3,3), padding = "same", activation = "relu"),
    layers.Conv2D(filters = 512, kernel_size = (3,3), padding = "same", activation = "relu"),
    layers.MaxPool2D(pool_size = (2,2),strides = (2,2)),
    layers.Conv2D(filters = 512, kernel_size = (3,3), padding = "same", activation = "relu"),
    layers.Conv2D(filters = 512, kernel_size = (3,3), padding = "same", activation = "relu"),
    layers.Conv2D(filters = 512, kernel_size = (3,3), padding = "same", activation = "relu"),
    layers.MaxPool2D(pool_size = (2,2),strides = (2,2)),
    layers.Flatten(),
    layers.Dense(units = 4096,activation = "relu"),
    layers.Dense(units = 4096,activation = "relu"),
    layers.Dense(units = 2, activation = "softmax"),
])

# Compile the model

This time we are going to use the **tf.keras.optimizers.Adam** optimizer and **tf.keras.losses.SparseCategoricalCrossentropy** loss function.

In [None]:
model.compile(
    optimizer='adam',
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=['accuracy']
)

# Model summary

We can see all the layers of the network using the Keras **Model.summary** method:

In [None]:
model.summary()

# Train the model

In [None]:
# epoch to train
epochs = 10

# train
history = model.fit(
  train_ds,
  validation_data=val_ds,
  epochs=epochs
)

In [None]:
# Predicciones de probabilidad
y_proba = model.predict(X_test).flatten()

# Cálculo de la métrica AUC
auc_score = roc_auc_score(y_test, y_proba)
print(f"AUC Score: {auc_score:.4f}")

# Curva ROC
fpr, tpr, thresholds = roc_curve(y_test, y_proba)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, label=f'ROC Curve (AUC = {auc_score:.4f})')
plt.plot([0, 1], [0, 1], 'k--', label='Random')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve: Clasificador Gato vs Perro')
plt.legend()
plt.grid(True)
plt.show()