
# Clasificación de Razas de Mascotas — Oxford‑IIIT Pet (Keras/TensorFlow)

**Luis Ubaldo Balderas Sanchez A01751150** 




## 1. Introducción

**Problema:** Clasificar imágenes de perros y gatos en **37 razas** usando **deep learning**.  
**Relevancia:** Aplicaciones en catalogación automática, búsquedas por imagen y apoyo a refugios/ONGs para identificación.

**Objetivo:** Entrenar un modelo CNN (transfer learning) y **mejorarlo** mediante fine‑tuning/regularización, evaluando con métricas de validación y prueba.



## 2. Datos

**Fuente:** Oxford‑IIIT Pet (via `tensorflow_datasets`).  
- 37 clases de perros y gatos.  
- En este notebook descargamos los datos automáticamente 

**Split:** Usaremos `train/val/test` (10% de train pasa a validación).  
**Preprocesamiento:** Redimensionado a 224×224, normalización según la *preprocess* de la arquitectura base.


In [1]:

import os, math, json, itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow import keras
from keras import layers, callbacks

In [2]:

import os, math, json, itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow import keras
from keras import layers, callbacks

# Config reproducibilidad (opcional)
SEED = 42
tf.random.set_seed(SEED)
np.random.seed(SEED)

DATA_ROOT = "data/processed/pets"
IMG_SIZE = (224, 224)
VAL_FRACTION = 0.10

os.makedirs("data/processed", exist_ok=True)

print(tf.__version__, keras.__version__)


2.16.1 3.3.3


In [3]:

# Descarga/lectura del dataset con TFDS y exportación a carpetas
def _save_example(img, label, class_names, split_dir, idx):
    cls = class_names[int(label)]
    class_dir = os.path.join(split_dir, cls)
    os.makedirs(class_dir, exist_ok=True)
    img = tf.image.resize(img, IMG_SIZE)
    img = tf.cast(tf.clip_by_value(img, 0, 255), tf.uint8)
    tf.io.write_file(os.path.join(class_dir, f"{cls}_{idx:06d}.jpg"), tf.io.encode_jpeg(img))

def prepare_oxford_pets():
    ds_train = tfds.load("oxford_iiit_pet", split="train", as_supervised=True, shuffle_files=True)
    ds_test  = tfds.load("oxford_iiit_pet", split="test",  as_supervised=True, shuffle_files=False)
    info = tfds.builder("oxford_iiit_pet").info
    class_names = info.features["label"].names
    
    # materializamos para split de validación reproducible
    train_list = list(tfds.as_numpy(ds_train))
    n_val = math.ceil(len(train_list) * VAL_FRACTION)
    val_list = train_list[:n_val]
    trn_list = train_list[n_val:]
    
    # crear carpetas
    for split in ["train", "val", "test"]:
        for c in class_names:
            os.makedirs(os.path.join(DATA_ROOT, split, c), exist_ok=True)
    
    i = 0
    for img, label in trn_list:
        _save_example(img, label, class_names, os.path.join(DATA_ROOT, "train"), i); i += 1
    j = 0
    for img, label in val_list:
        _save_example(img, label, class_names, os.path.join(DATA_ROOT, "val"), j); j += 1
    k = 0
    for img, label in tfds.as_numpy(ds_test):
        _save_example(img, label, class_names, os.path.join(DATA_ROOT, "test"), k); k += 1
    
    return class_names

if not os.path.exists(os.path.join(DATA_ROOT, "train")):
    class_names = prepare_oxford_pets()
else:
    # leer nombres de clase desde carpetas
    class_names = sorted([d for d in os.listdir(os.path.join(DATA_ROOT, "train")) if os.path.isdir(os.path.join(DATA_ROOT, "train", d))])

len(class_names), class_names[:5]


(37, ['Abyssinian', 'Bengal', 'Birman', 'Bombay', 'British_Shorthair'])

In [4]:

# Visualización rápida: distribución de clases (train)
def class_distribution(split="train"):
    root = os.path.join(DATA_ROOT, split)
    counts = {}
    for c in class_names:
        cdir = os.path.join(root, c)
        counts[c] = len([f for f in os.listdir(cdir) if f.lower().endswith((".jpg",".jpeg",".png"))])
    return pd.Series(counts).sort_values(ascending=False)

dist_train = class_distribution("train")
dist_train.head(10)


Birman             95
wheaten_terrier    95
chihuahua          95
samoyed            94
keeshond           94
leonberger         94
Persian            94
Sphynx             94
pomeranian         93
great_pyrenees     93
dtype: int64


## 3. Desarrollo del modelo (baseline)
Arquitectura: **MobileNetV3Small** (preentrenada en ImageNet, *include_top=False*), GAP + Dense softmax.  
**Regularización:** Dropout + L2.  
**Callbacks:** EarlyStopping, ReduceLROnPlateau, ModelCheckpoint.


In [5]:
AUTOTUNE = tf.data.AUTOTUNE

def load_ds(root, img_size=(224,224), batch_size=16, seed=42):
    ds_train = keras.utils.image_dataset_from_directory(
        os.path.join(root, "train"),
        image_size=img_size,
        batch_size=batch_size,
        label_mode="categorical",
        seed=seed
    )
    class_names_ds = ds_train.class_names

    ds_val = keras.utils.image_dataset_from_directory(
        os.path.join(root, "val"),
        image_size=img_size,
        batch_size=batch_size,
        label_mode="categorical",
        shuffle=False
    )

    # Data augmentation más agresivo
    aug = keras.Sequential([
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.12),
        layers.RandomZoom(0.15),
        layers.RandomTranslation(0.05, 0.05),
        layers.RandomContrast(0.2),
    ])

    ds_train = ds_train.map(
        lambda x, y: (aug(x, training=True), y),
        num_parallel_calls=AUTOTUNE
    )

    return ds_train.prefetch(AUTOTUNE), ds_val.prefetch(AUTOTUNE), class_names_ds


def build_model(
    num_classes,
    input_shape=(224, 224, 3),
    dropout=0.3,
    l2_reg=1e-5,
    train_backbone=False,
    backbone="mobilenet_v3_small",
):
    if backbone == "mobilenet_v3_small":
        Base = keras.applications.MobileNetV3Small
        preprocess = keras.applications.mobilenet_v3.preprocess_input
    elif backbone == "efficientnet_b0":
        Base = keras.applications.EfficientNetB0
        preprocess = keras.applications.efficientnet.preprocess_input
    else:
        raise ValueError(f"Backbone no soportado: {backbone}")

    base = Base(include_top=False, input_shape=input_shape, weights="imagenet")
    base.trainable = train_backbone

    inputs = keras.Input(shape=input_shape)
    x = preprocess(inputs)
    x = base(x, training=False)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(dropout)(x)
    outputs = layers.Dense(
        num_classes,
        activation="softmax",
        kernel_regularizer=keras.regularizers.l2(l2_reg),
    )(x)
    return keras.Model(inputs, outputs)


In [6]:

# Entrenamiento (baseline)
BATCH = 16
EPOCHS_BASE = 12
LR_BASE = 1e-3

ds_train, ds_val, class_names_ds = load_ds(DATA_ROOT, batch_size=BATCH)
assert class_names_ds == class_names, "El orden de clases debe coincidir con el de las carpetas."

model = build_model(num_classes=len(class_names), train_backbone=False)
model.compile(optimizer=keras.optimizers.Adam(LR_BASE),
              loss="categorical_crossentropy",
              metrics=["accuracy"])

os.makedirs("runs", exist_ok=True)
cbs = [
    callbacks.ModelCheckpoint("runs/best.keras", monitor="val_accuracy", save_best_only=True),
    callbacks.EarlyStopping(monitor="val_accuracy", patience=5, restore_best_weights=True),
    callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3),
    callbacks.CSVLogger("runs/train_log.csv", append=False),
]

hist_base = model.fit(ds_train, validation_data=ds_val, epochs=EPOCHS_BASE, callbacks=cbs)

# guardar nombres de clase
with open("runs/class_names.json", "w", encoding="utf-8") as f:
    json.dump(class_names, f, ensure_ascii=False, indent=2)

model.summary()


Found 3312 files belonging to 37 classes.
Found 368 files belonging to 37 classes.
Epoch 1/12
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 125ms/step - accuracy: 0.1702 - loss: 3.3017 - val_accuracy: 0.6848 - val_loss: 1.2651 - learning_rate: 0.0010
Epoch 2/12
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 111ms/step - accuracy: 0.5786 - loss: 1.4871 - val_accuracy: 0.7636 - val_loss: 0.8614 - learning_rate: 0.0010
Epoch 3/12
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 105ms/step - accuracy: 0.6820 - loss: 1.1045 - val_accuracy: 0.7935 - val_loss: 0.7333 - learning_rate: 0.0010
Epoch 4/12
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 105ms/step - accuracy: 0.7102 - loss: 0.9635 - val_accuracy: 0.8179 - val_loss: 0.6517 - learning_rate: 0.0010
Epoch 5/12
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 107ms/step - accuracy: 0.7276 - loss: 0.8800 - val_accuracy: 0.8234 - val_loss: 0.6

## EfficentNet 

In [7]:
# Entrenamiento con EfficientNet-B0 (Experimento arquitectura)
BATCH = 16
EPOCHS_BASE = 12
LR_BASE = 1e-3

ds_train_eff, ds_val_eff, class_names_eff = load_ds(DATA_ROOT, batch_size=BATCH)
assert class_names_eff == class_names, "El orden de clases debe coincidir"

model_eff = build_model(
    num_classes=len(class_names),
    train_backbone=False,
    backbone="efficientnet_b0"
)

model_eff.compile(
    optimizer=keras.optimizers.Adam(LR_BASE),
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

os.makedirs("runs", exist_ok=True)
cbs_eff = [
    callbacks.ModelCheckpoint("runs/best_effnet.keras", monitor="val_accuracy", save_best_only=True),
    callbacks.EarlyStopping(monitor="val_accuracy", patience=5, restore_best_weights=True),
    callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3),
]

hist_eff = model_eff.fit(ds_train_eff, validation_data=ds_val_eff, epochs=EPOCHS_BASE, callbacks=cbs_eff)
model_eff.summary()


Found 3312 files belonging to 37 classes.
Found 368 files belonging to 37 classes.
Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5
[1m16705208/16705208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Epoch 1/12
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 319ms/step - accuracy: 0.3742 - loss: 2.5049 - val_accuracy: 0.8152 - val_loss: 0.7122 - learning_rate: 0.0010
Epoch 2/12
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 363ms/step - accuracy: 0.7742 - loss: 0.8827 - val_accuracy: 0.8668 - val_loss: 0.4989 - learning_rate: 0.0010
Epoch 3/12
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m55s[0m 265ms/step - accuracy: 0.8210 - loss: 0.6624 - val_accuracy: 0.8859 - val_loss: 0.4148 - learning_rate: 0.0010
Epoch 4/12
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m54s[0m 260ms/step - accuracy: 0.8542 - loss: 0.5417 - val_accuracy: 0.8804 - val_loss: 0.3832 - le


## 4. Ajuste del modelo (fine‑tuning)
Descongelamos un **30%** final del backbone y entrenamos con **LR bajo**.


In [8]:

# Fine-tuning 
UNFREEZE_RATIO = 0.30
EPOCHS_FT = 8
LR_FT = 1e-5

model_ft = keras.models.load_model("runs/best.keras")

# identificar backbone (submodelo con muchas capas)
backbone = None
for lyr in model_ft.layers:
    if isinstance(lyr, keras.Model) and len(lyr.layers) > 10:
        backbone = lyr; break
assert backbone is not None, "No se encontró el backbone"

n = len(backbone.layers)
cut = int(n * (1 - UNFREEZE_RATIO))
for i, layer in enumerate(backbone.layers):
    layer.trainable = (i >= cut)

model_ft.compile(optimizer=keras.optimizers.Adam(LR_FT),
                 loss="categorical_crossentropy",
                 metrics=["accuracy"])

cbs_ft = [
    callbacks.ModelCheckpoint("runs/best_finetune.keras", monitor="val_accuracy", save_best_only=True),
    callbacks.EarlyStopping(monitor="val_accuracy", patience=5, restore_best_weights=True),
    callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3),
    callbacks.CSVLogger("runs/train_log_finetune.csv", append=False),
]

hist_ft = model_ft.fit(ds_train, validation_data=ds_val, epochs=EPOCHS_FT, callbacks=cbs_ft)


Epoch 1/8
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 105ms/step - accuracy: 0.6245 - loss: 1.2144 - val_accuracy: 0.8397 - val_loss: 0.5489 - learning_rate: 1.0000e-05
Epoch 2/8
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 95ms/step - accuracy: 0.6773 - loss: 1.0760 - val_accuracy: 0.8370 - val_loss: 0.5696 - learning_rate: 1.0000e-05
Epoch 3/8
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 100ms/step - accuracy: 0.6904 - loss: 1.0047 - val_accuracy: 0.8315 - val_loss: 0.5730 - learning_rate: 1.0000e-05
Epoch 4/8
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 100ms/step - accuracy: 0.7137 - loss: 0.9460 - val_accuracy: 0.8234 - val_loss: 0.5741 - learning_rate: 1.0000e-05
Epoch 5/8
[1m207/207[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 108ms/step - accuracy: 0.7251 - loss: 0.8724 - val_accuracy: 0.8261 - val_loss: 0.5705 - learning_rate: 5.0000e-06
Epoch 6/8
[1m207/207[0m [32m━━━━━━━━━━


## 5. Resultados (test)
Calculamos **accuracy** y **F1 macro**, y graficamos la **matriz de confusión**.


In [9]:

# Carga test y evaluación
def load_all(split_dir):
    ds = keras.utils.image_dataset_from_directory(split_dir, image_size=IMG_SIZE, batch_size=32, label_mode="categorical", shuffle=False)
    Xs, Ys = [], []
    for x,y in ds:
        Xs.append(x.numpy()); Ys.append(y.numpy())
    return np.vstack(Xs), np.vstack(Ys), ds.class_names

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score

X_test, Y_test, class_names_test = load_all(os.path.join(DATA_ROOT, "test"))
m_final_path = "runs/best_finetune.keras" if os.path.exists("runs/best_finetune.keras") else "runs/best.keras"
m_final = keras.models.load_model(m_final_path)

P = m_final.predict(X_test, verbose=0)
y_true = Y_test.argmax(1); y_pred = P.argmax(1)

acc = accuracy_score(y_true, y_pred)
f1m = f1_score(y_true, y_pred, average="macro")
print("Test accuracy =", acc)
print("Test F1 macro =", f1m)

print("\nClassification report:\n", classification_report(y_true, y_pred, target_names=class_names, digits=4))

cm = confusion_matrix(y_true, y_pred)

# Plot CM
import matplotlib.pyplot as plt
import itertools, os
os.makedirs("reports/figures", exist_ok=True)
plt.figure(figsize=(8,8))
plt.imshow(cm, interpolation='nearest')
plt.title('Confusion Matrix')
plt.colorbar()
ticks = np.arange(len(class_names))
plt.xticks(ticks, class_names, rotation=90)
plt.yticks(ticks, class_names)
thresh = cm.max()/2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
    plt.text(j, i, format(cm[i, j], 'd'),
             horizontalalignment="center",
             color="white" if cm[i, j] > thresh else "black", fontsize=7)
plt.tight_layout(); plt.ylabel('True'); plt.xlabel('Pred')
plt.savefig("reports/figures/confusion_matrix.png", dpi=160)
plt.close()


Found 3669 files belonging to 37 classes.
Test accuracy = 0.8233851185609158
Test F1 macro = 0.8227004193717424

Classification report:
                             precision    recall  f1-score   support

                Abyssinian     0.9000    0.7347    0.8090        98
                    Bengal     0.8533    0.6400    0.7314       100
                    Birman     0.6825    0.8600    0.7611       100
                    Bombay     0.9750    0.8864    0.9286        88
         British_Shorthair     0.7849    0.7300    0.7565       100
              Egyptian_Mau     0.9419    0.8351    0.8852        97
                Maine_Coon     0.6471    0.7700    0.7032       100
                   Persian     0.8315    0.7400    0.7831       100
                   Ragdoll     0.6095    0.6400    0.6244       100
              Russian_Blue     0.7596    0.7900    0.7745       100
                   Siamese     0.8936    0.8400    0.8660       100
                    Sphynx     0.9247    0.860


## 7. Aplicación (predicción en imágenes nuevas)
Función que recibe una **ruta a imagen** y devuelve **Top‑5** predicciones.


In [11]:

from PIL import Image

CLASS_JSON = "runs/class_names.json"
with open(CLASS_JSON, "r", encoding="utf-8") as f:
    CLASS_NAMES = json.load(f)

def predict_image(path, model_path=None, topk=5):
    model_path = model_path or ("runs/best_finetune.keras" if os.path.exists("runs/best_finetune.keras") else "runs/best.keras")
    model = keras.models.load_model(model_path)
    img = Image.open(path).convert("RGB").resize(IMG_SIZE)
    x = np.array(img, dtype="float32")[None, ...]
    x = keras.applications.mobilenet_v3.preprocess_input(x)
    probs = model.predict(x, verbose=0)[0]
    idxs = probs.argsort()[-topk:][::-1]
    return [(CLASS_NAMES[i], float(probs[i])) for i in idxs]

# Ejemplo:
preds = predict_image("C:/Users/Luis/Documents/7_semestre/data_reto_bloque2/Implementacion_modelo_DeepLearning/runs/beagle2.png")
preds


[('basset_hound', 0.498619019985199),
 ('beagle', 0.25160694122314453),
 ('shiba_inu', 0.10371589660644531),
 ('staffordshire_bull_terrier', 0.06120742857456207),
 ('chihuahua', 0.058003440499305725)]

## Conclusiones 

* El modelo generaliza bien en promedio, pero muestra confusiones entre razas pequeñas/parecidas (p. ej., miniature_pinscher / chihuahua / beagle), sobre todo con ángulos raros y variaciones de iluminación.