# 1. Importaciones y Configuraci√≥n de GPU
Configuraci√≥n de memoria para evitar errores en WSL/Linux con tarjetas NVIDIA.


In [1]:
import numpy as np
import os
import re
import gc
import cv2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import tensorflow as tf
import keras
from keras.utils import to_categorical
from keras.models import Sequential, Model, load_model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization, LeakyReLU, Input
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

%matplotlib inline

# Limpieza preventiva
tf.keras.backend.clear_session()
gc.collect()

# Configuraci√≥n GPU
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"‚úÖ GPU Detectada y Configurada: {gpus[0].name}")
    except RuntimeError as e:
        print(e)
else:
    print("‚ö†Ô∏è No se detect√≥ GPU. Se usar√° CPU.")


I0000 00:00:1765995415.605961  159505 port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
I0000 00:00:1765995416.273595  159505 cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
I0000 00:00:1765995418.023442  159505 port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


‚úÖ GPU Detectada y Configurada: /physical_device:GPU:0


W0000 00:00:1765995418.869099  159505 cuda_executor.cc:1839] GPU interconnect information not available: INTERNAL: NVML doesn't support extracting fabric info or NVLink is not used by the device.
W0000 00:00:1765995419.290413  159505 cuda_executor.cc:1839] GPU interconnect information not available: INTERNAL: NVML doesn't support extracting fabric info or NVLink is not used by the device.
W0000 00:00:1765995419.290917  159505 cuda_executor.cc:1839] GPU interconnect information not available: INTERNAL: NVML doesn't support extracting fabric info or NVLink is not used by the device.
W0000 00:00:1765995419.290935  159505 gpu_device.cc:2456] TensorFlow was not built with CUDA kernel binaries compatible with compute capability 12.0a. CUDA kernels will be jit-compiled from PTX, which could take 30 minutes or longer.


# 2. Carga y Procesamiento de Im√°genes
Leemos las im√°genes de la carpeta `./dataset_animals`, las redimensionamos a 100x100 y las convertimos a Arrays.


In [2]:
# ==========================================
# REEMPLAZA LA CELDA 2 CON ESTO
# ==========================================

import os
import cv2
import numpy as np

# Configuraci√≥n
imgpath = "./dataset" 
IMG_SIZE = 100
class_names = [] # Para guardar los nombres de "Gato", "Perro", etc.

images = []
labels = [] # Esta vez llenamos las etiquetas AL MISMO TIEMPO

print(f"üìÇ Buscando carpetas en: {imgpath}")

if not os.path.exists(imgpath):
    print(f"‚ùå ERROR: La ruta {imgpath} no existe.")
else:
    # 1. Obtener lista de carpetas (clases) ordenadas
    # Esto asegura que el √≠ndice 0 siempre sea la misma clase
    carpetas = sorted([d for d in os.listdir(imgpath) if os.path.isdir(os.path.join(imgpath, d))])
    class_names = carpetas
    
    print(f"Clases detectadas: {class_names}")

    # 2. Recorrer cada carpeta espec√≠fica
    for indice, nombre_carpeta in enumerate(carpetas):
        ruta_carpeta = os.path.join(imgpath, nombre_carpeta)
        print(f"üîÑ Procesando clase '{nombre_carpeta}' (√çndice {indice})...")
        
        archivos = os.listdir(ruta_carpeta)
        count_local = 0
        
        for archivo in archivos:
            if archivo.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.tiff')):
                try:
                    ruta_img = os.path.join(ruta_carpeta, archivo)
                    
                    # Leer y procesar
                    image = cv2.imread(ruta_img)
                    
                    if image is None:
                        # Si cv2 no pudo leerla (archivo corrupto), la saltamos
                        continue
                        
                    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                    image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
                    
                    # === AQU√ç EST√Å LA SOLUCI√ìN ===
                    # Agregamos imagen y etiqueta JUNTAS. 
                    # Si falla la imagen, no se agrega ninguna de las dos.
                    images.append(image)
                    labels.append(indice)
                    
                    count_local += 1
                except Exception as e:
                    print(f"‚ö†Ô∏è Error en {archivo}: {e}")
        
        print(f"   ‚úÖ {count_local} im√°genes cargadas de {nombre_carpeta}")

    # Convertir a Numpy Arrays
    X = np.array(images, dtype=np.uint8)
    y = np.array(labels)

    print("\n================RESUMEN=================")
    print(f"Total Im√°genes (X): {len(X)}")
    print(f"Total Etiquetas (y): {len(y)}")
    
    # Verificaci√≥n final de seguridad
    if len(X) == len(y):
        print("‚úÖ ¬°Sincronizaci√≥n perfecta! Puedes continuar con la siguiente celda.")
    else:
        print("‚ùå ERROR CR√çTICO: Siguen disparejos (esto no deber√≠a pasar con este c√≥digo).")

üìÇ Leyendo im√°genes de: ./dataset
   Directorio: ./dataset/tortugas | Cantidad: 1
   Directorio: ./dataset/perros | Cantidad: 10860
   Directorio: ./dataset/gato | Cantidad: 10768
   Directorio: ./dataset/hormigas | Cantidad: 10528
   Directorio: ./dataset/mariquitas | Cantidad: 10008
Total im√°genes: 52723
Clases detectadas: ['tortugas', 'perros', 'gato', 'hormigas', 'mariquitas']


# 3. Preparaci√≥n de Datasets (Train/Test/Val)
Normalizaci√≥n (0-1) y One-Hot Encoding.


In [3]:
# Split inicial
train_X, test_X, train_Y, test_Y = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalizaci√≥n
train_X = train_X.astype('float32') / 255.0
test_X = test_X.astype('float32') / 255.0

# One-hot encoding
train_Y_one_hot = to_categorical(train_Y)
test_Y_one_hot = to_categorical(test_Y)

# Split validaci√≥n
train_X, valid_X, train_label, valid_label = train_test_split(
    train_X, train_Y_one_hot, test_size=0.2, random_state=13
)

print(f"Formas: Train:{train_X.shape}, Val:{valid_X.shape}, Test:{test_X.shape}")


ValueError: Found input variables with inconsistent numbers of samples: [52724, 52723]

# 4. Definici√≥n del Modelo CNN
Modelo secuencial adaptado para entrada de 100x100x3.


In [None]:
INIT_LR = 1e-3
nClasses = len(animal_names)

animal_model = Sequential()
animal_model.add(Input(shape=(IMG_SIZE, IMG_SIZE, 3)))

# Bloque 1
animal_model.add(Conv2D(32, kernel_size=(3, 3), activation='linear', padding='same'))
animal_model.add(LeakyReLU(alpha=0.1))
animal_model.add(BatchNormalization())
animal_model.add(MaxPooling2D(pool_size=(2, 2)))
animal_model.add(Dropout(0.25))

# Bloque 2
animal_model.add(Conv2D(64, kernel_size=(3, 3), activation='linear', padding='same'))
animal_model.add(LeakyReLU(alpha=0.1))
animal_model.add(BatchNormalization())
animal_model.add(MaxPooling2D(pool_size=(2, 2)))
animal_model.add(Dropout(0.25))

# Bloque 3
animal_model.add(Conv2D(128, kernel_size=(3, 3), activation='linear', padding='same'))
animal_model.add(LeakyReLU(alpha=0.1))
animal_model.add(BatchNormalization())
animal_model.add(MaxPooling2D(pool_size=(2, 2)))
animal_model.add(Dropout(0.4))

# Salida
animal_model.add(Flatten())
animal_model.add(Dense(128, activation='linear'))
animal_model.add(LeakyReLU(alpha=0.1))
animal_model.add(Dropout(0.5))
animal_model.add(Dense(nClasses, activation='softmax'))

animal_model.compile(
    loss=keras.losses.categorical_crossentropy,
    optimizer=keras.optimizers.Adagrad(learning_rate=INIT_LR),
    metrics=['accuracy']
)

animal_model.summary()


# 5. Entrenamiento
Usamos EarlyStopping y Checkpoints para guardar el mejor modelo.


In [None]:
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6, verbose=1)
checkpoint = ModelCheckpoint('animales_best.keras', monitor='val_loss', save_best_only=True, verbose=1)

print("üöÄ Entrenando...")
animal_train = animal_model.fit(
    train_X, train_label, 
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    verbose=1,
    validation_data=(valid_X, valid_label), 
    callbacks=[early_stop, reduce_lr, checkpoint]
)

animal_model.save("animales_final_100x100.keras")


# 6. Evaluaci√≥n de M√©tricas y Gr√°ficos
Visualizamos la p√©rdida y precisi√≥n durante el entrenamiento.


In [None]:
test_eval = animal_model.evaluate(test_X, test_Y_one_hot, verbose=1)
print(f'Test loss: {test_eval[0]}')
print(f'Test accuracy: {test_eval[1]}')

accuracy = animal_train.history['accuracy']
val_accuracy = animal_train.history['val_accuracy']
loss = animal_train.history['loss']
val_loss = animal_train.history['val_loss']
epochs_range = range(len(accuracy))

plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, accuracy, 'bo', label='Training accuracy')
plt.plot(epochs_range, val_accuracy, 'b', label='Validation accuracy')
plt.title('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, 'bo', label='Training loss')
plt.plot(epochs_range, val_loss, 'b', label='Validation loss')
plt.title('Loss')
plt.legend()
plt.show()


# 7. Visualizaci√≥n de Resultados
Mostramos ejemplos de predicciones correctas e incorrectas del set de prueba.


In [None]:
predicted_classes_raw = animal_model.predict(test_X)
predicted_classes = np.argmax(predicted_classes_raw, axis=1)

# √çndices de correctos e incorrectos
correct = np.where(predicted_classes == test_Y)[0]
incorrect = np.where(predicted_classes != test_Y)[0]

print(f"‚úÖ Correctas: {len(correct)} | ‚ùå Incorrectas: {len(incorrect)}")

# Gr√°fica de Correctos
plt.figure(figsize=(10,10))
for i, correct_idx in enumerate(correct[:9]):
    plt.subplot(3,3,i+1)
    plt.imshow(test_X[correct_idx])
    plt.title(f"Pred: {animal_names[predicted_classes[correct_idx]]} \n Real: {animal_names[test_Y[correct_idx]]}", color='green')
    plt.axis('off')
plt.suptitle("Predicciones Correctas")
plt.tight_layout()
plt.show()

# Gr√°fica de Incorrectos
if len(incorrect) > 0:
    plt.figure(figsize=(10,10))
    for i, incorrect_idx in enumerate(incorrect[:9]):
        plt.subplot(3,3,i+1)
        plt.imshow(test_X[incorrect_idx])
        plt.title(f"Pred: {animal_names[predicted_classes[incorrect_idx]]} \n Real: {animal_names[test_Y[incorrect_idx]]}", color='red')
        plt.axis('off')
    plt.suptitle("Predicciones Incorrectas")
    plt.tight_layout()
    plt.show()

# Classification Report
target_names_list = [f"{name}" for name in animal_names]
print(classification_report(test_Y, predicted_classes, target_names=target_names_list))


# 8. Prueba con Im√°genes Externas (Carpeta `tests`)
Prueba del modelo con im√°genes nuevas que no ha visto antes.


In [None]:
folder_tests = './tests'
test_images_ext = []
test_filenames = []

if not os.path.exists(folder_tests):
    print(f"‚ö†Ô∏è La carpeta {folder_tests} no existe.")
else:
    valid_exts = ('.jpg', '.jpeg', '.png', '.bmp')
    for archivo in os.listdir(folder_tests):
        if archivo.lower().endswith(valid_exts):
            path = os.path.join(folder_tests, archivo)
            try:
                img = cv2.imread(path)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
                test_images_ext.append(img)
                test_filenames.append(archivo)
            except:
                pass

    if test_images_ext:
        X_ext = np.array(test_images_ext, dtype=np.uint8).astype('float32') / 255.0
        preds_ext = animal_model.predict(X_ext)
        
        plt.figure(figsize=(15, 5))
        for i, pred in enumerate(preds_ext[:5]): # Mostrar 5
            idx = np.argmax(pred)
            conf = pred[idx] * 100
            plt.subplot(1, 5, i+1)
            plt.imshow(test_images_ext[i])
            plt.title(f"{animal_names[idx]}\n{conf:.1f}%")
            plt.axis('off')
        plt.show()
    else:
        print("No hay im√°genes en /tests")
