#1. Montar Google Drive

In [74]:
!pip install gradio --quiet

In [75]:
from google.colab import drive
import os
import gradio as gr
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from collections import Counter,deque
import glob
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import gradio as gr

In [76]:
drive.mount('/content/drive')

# Rutas a tus carpetas de entrenamiento y test en Drive
DATASET_TRAIN_PATH = "/content/drive/MyDrive/simpsons_dataset"
DATASET_TEST_PATH = "/content/drive/MyDrive/kaggle_simpson_testset"

print("Train OK:", os.path.exists(DATASET_TRAIN_PATH))
print("Test OK:", os.path.exists(DATASET_TEST_PATH))

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Train OK: True
Test OK: True


#2. Cargar imágenes (entrenamiento y test)

In [77]:
# Tamaño de las imágenes que usaremos
IMG_SIZE = 64

# Diccionario de personajes (18 más frecuentes)
MAP_CHARACTERS = {
    0: 'abraham_grampa_simpson', 1: 'apu_nahasapeemapetilon', 2: 'bart_simpson',
    3: 'charles_montgomery_burns', 4: 'chief_wiggum', 5: 'comic_book_guy', 6: 'edna_krabappel',
    7: 'homer_simpson', 8: 'kent_brockman', 9: 'krusty_the_clown', 10: 'lisa_simpson',
    11: 'marge_simpson', 12: 'milhouse_van_houten', 13: 'moe_szyslak',
    14: 'ned_flanders', 15: 'nelson_muntz', 16: 'principal_skinner', 17: 'sideshow_bob'
}

# Función para cargar imágenes de entrenamiento
def load_train_set(dirname, map_characters, verbose=True):
    X_train = []
    y_train = []
    for label, character in map_characters.items():
        folder_path = os.path.join(dirname, character)
        if not os.path.exists(folder_path):
            continue
        images = [f for f in os.listdir(folder_path) if f.endswith(("jpg", "jpeg", "png"))]
        if verbose:
            print(f"Leyendo {len(images)} imágenes de {character}")
        for image_name in images:
            image = cv2.imread(os.path.join(folder_path, image_name))
            if image is not None:
                image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
                X_train.append(image)
                y_train.append(label)
    return np.array(X_train), np.array(y_train)

# Función para cargar imágenes de test
def load_test_set(dirname, map_characters, verbose=True):
    X_test = []
    y_test = []
    reverse_dict = {v: k for k, v in map_characters.items()}
    for filename in glob.glob(dirname + '/*.*'):
        char_name = "_".join(filename.split('/')[-1].split('_')[:-1])
        if char_name in reverse_dict:
            image = cv2.imread(filename)
            if image is not None:
                image = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
                X_test.append(image)
                y_test.append(reverse_dict[char_name])
    if verbose:
        print(f"Leídas {len(X_test)} imágenes de test")
    return np.array(X_test), np.array(y_test)

#3. Cargar las imágenes en memoria

In [78]:
# Usamos las rutas definidas en el paso 1
X, y = load_train_set(DATASET_TRAIN_PATH, MAP_CHARACTERS)
X_t, y_t = load_test_set(DATASET_TEST_PATH, MAP_CHARACTERS)

print(f"Total imágenes de entrenamiento: {X.shape}")
print(f"Total imágenes de test: {X_t.shape}")

Leyendo 913 imágenes de abraham_grampa_simpson
Leyendo 623 imágenes de apu_nahasapeemapetilon
Leyendo 1342 imágenes de bart_simpson
Leyendo 1193 imágenes de charles_montgomery_burns
Leyendo 986 imágenes de chief_wiggum
Leyendo 469 imágenes de comic_book_guy
Leyendo 457 imágenes de edna_krabappel
Leyendo 2246 imágenes de homer_simpson
Leyendo 498 imágenes de kent_brockman
Leyendo 1206 imágenes de krusty_the_clown
Leyendo 1354 imágenes de lisa_simpson
Leyendo 1291 imágenes de marge_simpson
Leyendo 1079 imágenes de milhouse_van_houten
Leyendo 1452 imágenes de moe_szyslak
Leyendo 1454 imágenes de ned_flanders
Leyendo 358 imágenes de nelson_muntz
Leyendo 1194 imágenes de principal_skinner
Leyendo 877 imágenes de sideshow_bob
Leídas 890 imágenes de test
Total imágenes de entrenamiento: (18992, 64, 64, 3)
Total imágenes de test: (890, 64, 64, 3)


#4. Separar train y validation (80/20) + normalizar

In [79]:
# Normalizar imágenes a rango [0,1]
X = X.astype('float32') / 255.0
X_t = X_t.astype('float32') / 255.0

# División con estratificación para mantener balance de clases
X_train, X_val, y_train, y_val = train_test_split(
    X, y,
    test_size=0.2,
    stratify=y,
    random_state=42
)

print(f"Imágenes para entrenar: {X_train.shape}, Para Validar: {X_val.shape}")

Imágenes para entrenar: (15193, 64, 64, 3), Para Validar: (3799, 64, 64, 3)


#5. Definir tu modelo CNN básico

In [80]:
num_classes = len(MAP_CHARACTERS)

model = models.Sequential([
    layers.Input(shape=(64, 64, 3)),

    # Convolución 1
    layers.Conv2D(32, (3, 3), padding='same', activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D(pool_size=(2, 2)),

    # Convolución 2
    layers.Conv2D(64, (3, 3), padding='same', activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D(pool_size=(2, 2)),

    # Convolución 3
    layers.Conv2D(128, (3, 3), padding='same', activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D(pool_size=(2, 2)),

    # Capa completamente conectada
    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.5),

    # Capa de salida
    layers.Dense(num_classes, activation='softmax')
])

# Compilar el modelo
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

model.summary()

#6. Entrenamiento del modelo con métricas y callbacks

In [81]:
# Callbacks para entrenamiento robusto
early_stop = EarlyStopping(
    monitor='val_loss',
    patience=7,
    restore_best_weights=True,
    verbose=1
)

checkpoint = ModelCheckpoint(
    'best_model.h5',
    monitor='val_loss',
    save_best_only=True,
    verbose=1
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=3,
    verbose=1
)

#  Entrenar el modelo

In [82]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=64,
    callbacks=[early_stop, checkpoint, reduce_lr]
)

Epoch 1/50
[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - accuracy: 0.2215 - loss: 2.9090
Epoch 1: val_loss improved from inf to 3.92178, saving model to best_model.h5




[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 27ms/step - accuracy: 0.2218 - loss: 2.9069 - val_accuracy: 0.2037 - val_loss: 3.9218 - learning_rate: 0.0010
Epoch 2/50
[1m235/238[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 12ms/step - accuracy: 0.4647 - loss: 1.7689
Epoch 2: val_loss improved from 3.92178 to 1.61422, saving model to best_model.h5




[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.4650 - loss: 1.7677 - val_accuracy: 0.5080 - val_loss: 1.6142 - learning_rate: 0.0010
Epoch 3/50
[1m234/238[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 12ms/step - accuracy: 0.5875 - loss: 1.3447
Epoch 3: val_loss improved from 1.61422 to 1.10191, saving model to best_model.h5




[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 13ms/step - accuracy: 0.5877 - loss: 1.3436 - val_accuracy: 0.6565 - val_loss: 1.1019 - learning_rate: 0.0010
Epoch 4/50
[1m236/238[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 11ms/step - accuracy: 0.6814 - loss: 0.9975
Epoch 4: val_loss improved from 1.10191 to 0.86669, saving model to best_model.h5




[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 13ms/step - accuracy: 0.6815 - loss: 0.9976 - val_accuracy: 0.7470 - val_loss: 0.8667 - learning_rate: 0.0010
Epoch 5/50
[1m236/238[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 11ms/step - accuracy: 0.7419 - loss: 0.8075
Epoch 5: val_loss did not improve from 0.86669
[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - accuracy: 0.7420 - loss: 0.8074 - val_accuracy: 0.7115 - val_loss: 1.0422 - learning_rate: 0.0010
Epoch 6/50
[1m236/238[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 11ms/step - accuracy: 0.7961 - loss: 0.6385
Epoch 6: val_loss improved from 0.86669 to 0.68306, saving model to best_model.h5




[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 13ms/step - accuracy: 0.7961 - loss: 0.6383 - val_accuracy: 0.8068 - val_loss: 0.6831 - learning_rate: 0.0010
Epoch 7/50
[1m237/238[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 11ms/step - accuracy: 0.8362 - loss: 0.5146
Epoch 7: val_loss improved from 0.68306 to 0.60858, saving model to best_model.h5




[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 13ms/step - accuracy: 0.8362 - loss: 0.5146 - val_accuracy: 0.8460 - val_loss: 0.6086 - learning_rate: 0.0010
Epoch 8/50
[1m237/238[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 13ms/step - accuracy: 0.8709 - loss: 0.3926
Epoch 8: val_loss improved from 0.60858 to 0.59135, saving model to best_model.h5




[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 15ms/step - accuracy: 0.8709 - loss: 0.3927 - val_accuracy: 0.8439 - val_loss: 0.5913 - learning_rate: 0.0010
Epoch 9/50
[1m236/238[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 11ms/step - accuracy: 0.8947 - loss: 0.3184
Epoch 9: val_loss improved from 0.59135 to 0.57696, saving model to best_model.h5




[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 13ms/step - accuracy: 0.8946 - loss: 0.3188 - val_accuracy: 0.8576 - val_loss: 0.5770 - learning_rate: 0.0010
Epoch 10/50
[1m236/238[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 11ms/step - accuracy: 0.9134 - loss: 0.2656
Epoch 10: val_loss improved from 0.57696 to 0.55727, saving model to best_model.h5




[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 13ms/step - accuracy: 0.9134 - loss: 0.2657 - val_accuracy: 0.8710 - val_loss: 0.5573 - learning_rate: 0.0010
Epoch 11/50
[1m233/238[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 12ms/step - accuracy: 0.9228 - loss: 0.2334
Epoch 11: val_loss did not improve from 0.55727
[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 14ms/step - accuracy: 0.9228 - loss: 0.2335 - val_accuracy: 0.8460 - val_loss: 0.5968 - learning_rate: 0.0010
Epoch 12/50
[1m234/238[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 11ms/step - accuracy: 0.9316 - loss: 0.2092
Epoch 12: val_loss did not improve from 0.55727
[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 13ms/step - accuracy: 0.9316 - loss: 0.2093 - val_accuracy: 0.8644 - val_loss: 0.5719 - learning_rate: 0.0010
Epoch 13/50
[1m235/238[0m [32m━━━━━━━━━━━━━━━━━━━[0m



[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.9565 - loss: 0.1336 - val_accuracy: 0.8984 - val_loss: 0.4878 - learning_rate: 5.0000e-04
Epoch 15/50
[1m234/238[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 11ms/step - accuracy: 0.9663 - loss: 0.1054
Epoch 15: val_loss did not improve from 0.48779
[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 13ms/step - accuracy: 0.9663 - loss: 0.1053 - val_accuracy: 0.8968 - val_loss: 0.5115 - learning_rate: 5.0000e-04
Epoch 16/50
[1m237/238[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 11ms/step - accuracy: 0.9721 - loss: 0.0809
Epoch 16: val_loss did not improve from 0.48779
[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 13ms/step - accuracy: 0.9721 - loss: 0.0809 - val_accuracy: 0.8994 - val_loss: 0.5085 - learning_rate: 5.0000e-04
Epoch 17/50
[1m236/238[0m [32m━━━━━━━━━━━



[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - accuracy: 0.9853 - loss: 0.0423 - val_accuracy: 0.9110 - val_loss: 0.4828 - learning_rate: 2.5000e-04
Epoch 21/50
[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.9856 - loss: 0.0445
Epoch 21: val_loss did not improve from 0.48283
[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - accuracy: 0.9856 - loss: 0.0445 - val_accuracy: 0.9152 - val_loss: 0.4849 - learning_rate: 2.5000e-04
Epoch 22/50
[1m233/238[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 12ms/step - accuracy: 0.9859 - loss: 0.0458
Epoch 22: val_loss did not improve from 0.48283
[1m238/238[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - accuracy: 0.9860 - loss: 0.0456 - val_accuracy: 0.9089 - val_loss: 0.5009 - learning_rate: 2.5000e-04
Epoch 23/50
[1m235/238[0m [32m━━━━━━━━━━━

#Graficar métricas de entrenamiento

In [83]:
plt.figure(figsize=(14,6))

plt.subplot(1,2,1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy durante el entrenamiento')
plt.xlabel('Épocas')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1,2,2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss durante el entrenamiento')
plt.xlabel('Épocas')
plt.ylabel('Loss')
plt.legend()

plt.show()

#7. Evaluar el modelo entrenado(Cargar el mejor modelo)

In [84]:
# Carga del mejor modelo guardado por ModelCheckpoint
model = load_model('best_model.h5')



#Realizar predicciones

In [85]:
y_pred_probs = model.predict(X_t)
y_pred = np.argmax(y_pred_probs, axis=1)

[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step


# Generar métricas de evaluación

In [86]:
# Reporte por clase
print("Classification Report:")
print(classification_report(y_t, y_pred, target_names=list(MAP_CHARACTERS.values())))

Classification Report:
                          precision    recall  f1-score   support

  abraham_grampa_simpson       1.00      0.90      0.95        48
  apu_nahasapeemapetilon       0.96      0.98      0.97        50
            bart_simpson       0.94      0.98      0.96        50
charles_montgomery_burns       0.90      0.98      0.94        48
            chief_wiggum       1.00      1.00      1.00        50
          comic_book_guy       1.00      0.96      0.98        49
          edna_krabappel       1.00      0.98      0.99        50
           homer_simpson       0.91      0.98      0.94        50
           kent_brockman       1.00      0.96      0.98        50
        krusty_the_clown       1.00      1.00      1.00        50
            lisa_simpson       0.98      0.94      0.96        50
           marge_simpson       1.00      1.00      1.00        50
     milhouse_van_houten       1.00      1.00      1.00        49
             moe_szyslak       0.98      1.00      0

#Matriz de confusión

In [87]:
cm = confusion_matrix(y_t, y_pred)

plt.figure(figsize=(12,10))
sns.heatmap(cm, annot=True, fmt='d', xticklabels=MAP_CHARACTERS.values(), yticklabels=MAP_CHARACTERS.values(), cmap='Blues')
plt.title('Matriz de Confusión')
plt.xlabel('Predicción')
plt.ylabel('Valor real')
plt.xticks(rotation=90)
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()

#8. Predicción frame a frame desde video con Gradio

In [88]:
# Cargar modelo
model = load_model('best_model.h5')

# Tamaño esperado por la red
IMG_SIZE = 64

# Lista de nombres de clase
class_names = list(MAP_CHARACTERS.values())

# Función de predicción frame a frame
def predict_video(video_path):
    cap = cv2.VideoCapture(video_path)
    frame_preds = []

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Preprocesar el frame
        frame_resized = cv2.resize(frame, (IMG_SIZE, IMG_SIZE))
        frame_norm = frame_resized.astype('float32') / 255.0
        frame_input = np.expand_dims(frame_norm, axis=0)

        # Predicción
        prediction = model.predict(frame_input, verbose=0)
        pred_class = np.argmax(prediction)
        frame_preds.append(class_names[pred_class])

    cap.release()

    # Analizar predicciones
    if not frame_preds:
        return "No se pudo procesar el video."

    top = Counter(frame_preds).items()
    result = "\n".join([f"{char}: {count} frames" for char, count in top])
    return f"Personajes detectados:\n{result}"

# Interfaz Gradio
iface = gr.Interface(
    fn=predict_video,
    inputs=gr.Video(label="Sube un video de Los Simpsons"),
    outputs=gr.Textbox(label="Resultado"),
    title="Detector de personajes de Los Simpsons",
    description="Este modelo analiza frame por frame un video y detecta los personajes de los simpsons."
)

iface.launch()



It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://c12445fdfbdecc04c4.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


