In [11]:
# Para evitar cargar el archivo constantemente, lo he guradado en mi Drive

from google.colab import drive

drive.mount('/content/drive')

SIMPSONS_DIR = "/content/drive/MyDrive/datasets/simpsons"
SIMPSONS_ZIP = "simpson_nuevos.zip"


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [14]:
# ==========================================================
# NUEVA NOTEBOOK — CELDA ÚNICA: Cargar resultados.zip y predecir un ZIP nuevo
# Entradas (subidas manualmente a /content):
#   - /content/resultados.zip   (bundle del modelo + metadata)
#   - /content/nuevo.zip        (nuevo dataset para inferencia)
# Salidas:
#   - /content/predicciones.csv
# ==========================================================
import os, glob, json, zipfile, shutil, time
import numpy as np
import tensorflow as tf


# -------------------------
# 0) RUTAS
# -------------------------
RESULTS_ZIP = "/content/resultados_simpsons.zip"
NEW_ZIP     = os.path.join(SIMPSONS_DIR, SIMPSONS_ZIP)  # cambia el nombre si llega con otro nombre

RESULTS_DIR = "/content/resultados"
NEW_WORKDIR = "/content/new_data"

EXTS = (".jpg",".jpeg",".png",".bmp",".webp")
AUTOTUNE = tf.data.AUTOTUNE

assert os.path.isfile(RESULTS_ZIP), "Falta /content/resultados.zip (súbelo manualmente)"
assert os.path.isfile(NEW_ZIP),     "Falta /content/nuevo.zip (súbelo manualmente)"

# -------------------------
# 1) DESCOMPRIMIR resultados.zip
# -------------------------
if os.path.isdir(RESULTS_DIR):
    shutil.rmtree(RESULTS_DIR)
os.makedirs(RESULTS_DIR, exist_ok=True)

with zipfile.ZipFile(RESULTS_ZIP, "r") as z:
    z.extractall(RESULTS_DIR)

print("✅ resultados.zip extraído en:", RESULTS_DIR)
print("Contenido:", sorted(os.listdir(RESULTS_DIR))[:20])

MODEL_PATH = os.path.join(RESULTS_DIR, "model.keras")
META_PATH  = os.path.join(RESULTS_DIR, "metadata.json")

assert os.path.isfile(MODEL_PATH), f"No existe {MODEL_PATH}"
assert os.path.isfile(META_PATH),  f"No existe {META_PATH}"

# -------------------------
# 2) CARGAR METADATA + MODELO
# -------------------------
with open(META_PATH, "r", encoding="utf-8") as f:
    meta = json.load(f)

IMG_SIZE  = tuple(meta["img_size"])
CHANNELS  = int(meta["channels"])
CLASSES   = list(meta["classes"])
BATCH     = int(meta.get("batch_final", 32))

print("\nCONFIG INFERENCIA:")
print("  IMG_SIZE :", IMG_SIZE)
print("  CHANNELS :", CHANNELS)
print("  BATCH    :", BATCH)
print("  #CLASSES :", len(CLASSES))

model = tf.keras.models.load_model(MODEL_PATH)
print("\n✅ Modelo cargado:", MODEL_PATH)

# -------------------------
# 3) DESCOMPRIMIR NUEVO ZIP
# -------------------------
if os.path.isdir(NEW_WORKDIR):
    shutil.rmtree(NEW_WORKDIR)
os.makedirs(NEW_WORKDIR, exist_ok=True)

with zipfile.ZipFile(NEW_ZIP, "r") as z:
    z.extractall(NEW_WORKDIR)

print("\n✅ nuevo.zip extraído en:", NEW_WORKDIR)

# -------------------------
# 4) ENCONTRAR TODAS LAS IMÁGENES (sin asumir carpetas por clase)
#    (Soporta: imágenes en raíz, o en subcarpetas a cualquier profundidad)
# -------------------------
def list_all_images_recursive(root_dir, exts=EXTS):
    files = []
    for r, _, fs in os.walk(root_dir):
        for fn in fs:
            if fn.lower().endswith(exts):
                files.append(os.path.join(r, fn))
    return sorted(files)

files = list_all_images_recursive(NEW_WORKDIR)
if len(files) == 0:
    raise ValueError(f"No encontré imágenes dentro de {NEW_ZIP}")

print("Imágenes encontradas:", len(files))
print("Ejemplo:", files[0])

# -------------------------
# 5) PIPELINE TF.DATA (mismo preprocesamiento que entrenaste)
# -------------------------
def decode_image(path, img_size, channels):
    img = tf.io.read_file(path)
    img = tf.io.decode_image(img, channels=channels, expand_animations=False)
    img = tf.image.resize(img, img_size, antialias=True)
    img = tf.cast(img, tf.float32) / 255.0
    return img

def make_ds(paths, batch):
    ds = tf.data.Dataset.from_tensor_slices(paths)
    ds = ds.map(lambda p: decode_image(p, IMG_SIZE, CHANNELS), num_parallel_calls=AUTOTUNE)
    ds = ds.batch(batch).prefetch(AUTOTUNE)
    return ds

ds = make_ds(files, BATCH)

# -------------------------
# 6) PREDICCIÓN + CSV
# -------------------------
probs = model.predict(ds, verbose=0)
pred_idx = np.argmax(probs, axis=1)
pred_cls = [CLASSES[i] for i in pred_idx]
conf = np.max(probs, axis=1)

out_csv = "/content/predicciones.csv"
import csv
with open(out_csv, "w", newline="", encoding="utf-8") as f:
    w = csv.writer(f)
    w.writerow(["filepath", "pred_idx", "pred_class", "confidence"])
    for p, i, c, cf in zip(files, pred_idx, pred_cls, conf):
        w.writerow([p, int(i), c, float(cf)])

print("\n✅ Listo. CSV:", out_csv)
print("Primeras 5 predicciones:")
for k in range(min(5, len(files))):
    print(f"  {os.path.basename(files[k])} -> {pred_cls[k]} (conf={conf[k]:.3f})")


✅ resultados.zip extraído en: /content/resultados
Contenido: ['README_INFERENCIA.txt', 'infer_from_zip.py', 'metadata.json', 'model.keras', 'weights.best.keras']

CONFIG INFERENCIA:
  IMG_SIZE : (128, 128)
  CHANNELS : 3
  BATCH    : 32
  #CLASSES : 14


  saveable.load_own_variables(weights_store.get(inner_path))



✅ Modelo cargado: /content/resultados/model.keras

✅ nuevo.zip extraído en: /content/new_data
Imágenes encontradas: 70
Ejemplo: /content/new_data/abraham_grampa_simpson/abraham_grampa_simpson_1.jpg

✅ Listo. CSV: /content/predicciones.csv
Primeras 5 predicciones:
  abraham_grampa_simpson_1.jpg -> abraham_grampa_simpson (conf=0.927)
  abraham_grampa_simpson_2.jpg -> abraham_grampa_simpson (conf=0.925)
  abraham_grampa_simpson_3.jpg -> abraham_grampa_simpson (conf=0.990)
  abraham_grampa_simpson_4.jpg -> abraham_grampa_simpson (conf=0.974)
  abraham_grampa_simpson_5.jpg -> abraham_grampa_simpson (conf=0.882)


In [39]:
import pandas as pd

predicciones = pd.read_csv('predicciones.csv')
predicciones['filepath'] = predicciones['filepath'].str.replace('/content/new_data/', '').str[:-6]
predicciones.sample(10, random_state=42)

Unnamed: 0,filepath,pred_idx,pred_class,confidence
22,chief_wiggum/chief_wiggum,4,chief_wiggum,0.999415
0,abraham_grampa_simpson/abraham_grampa_simpson,0,abraham_grampa_simpson,0.926753
49,milhouse_van_houten/milhouse_van_houten,9,milhouse_van_houten,0.586198
4,abraham_grampa_simpson/abraham_grampa_simpson,0,abraham_grampa_simpson,0.881991
54,moe_szyslak/moe_szyslak,10,moe_szyslak,0.950932
18,charles_montgomery_burns/charles_montgomery_burns,3,charles_montgomery_burns,0.823624
10,bart_simpson/bart_simpson,2,bart_simpson,0.53186
33,krusty_the_clown/krusty_the_clown,12,principal_skinner,0.307832
45,milhouse_van_houten/milhouse_van_houten,9,milhouse_van_houten,0.999708
12,bart_simpson/bart_simpson,2,bart_simpson,0.818436


In [52]:
acierto = lambda x: x['pred_class'] in x['filepath']
predicciones['prediccion correcta'] = predicciones.apply(acierto, axis=1)

# agrupar por 'filepath' y resumir por: sumar prediccion correcta, mean confidence, min confidence y max confidence
predicciones.groupby('filepath', as_index=False).agg({'prediccion correcta': 'sum',
                                                      'confidence': ['min', 'mean', 'median', 'max']})

Unnamed: 0_level_0,filepath,prediccion correcta,confidence,confidence,confidence,confidence
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,min,mean,median,max
0,abraham_grampa_simpson/abraham_grampa_simpson,5,0.881991,0.939573,0.926753,0.990158
1,apu_nahasapeemapetilon/apu_nahasapeemapetilon,5,0.410188,0.837867,0.978461,0.995364
2,bart_simpson/bart_simpson,5,0.531847,0.613973,0.548468,0.818436
3,charles_montgomery_burns/charles_montgomery_burns,5,0.823624,0.940855,0.983843,0.99961
4,chief_wiggum/chief_wiggum,5,0.52391,0.902434,0.998818,0.9995
5,homer_simpson/homer_simpson,4,0.326112,0.51034,0.501079,0.719019
6,krusty_the_clown/krusty_the_clown,4,0.243852,0.673247,0.826842,0.999707
7,lisa_simpson/lisa_simpson,3,0.44649,0.710537,0.742203,0.979992
8,marge_simpson/marge_simpson,4,0.533591,0.760797,0.778029,0.976872
9,milhouse_van_houten/milhouse_van_houten,5,0.586198,0.907949,0.989323,0.999708
