In [None]:
# NOTEBOOK 2
import os
import mlflow
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.metrics import accuracy_score, f1_score
import numpy as np

# Cargar datasets preprocesados
ds_train = tf.data.experimental.load('preprocessed_train')
ds_valid = tf.data.experimental.load('preprocessed_valid')

# Ciclo de entrenamiento
N_RUNS = 5
accuracies = []
f1_scores = []
best_val_acc = 0
best_history = None
mlflow.set_experiment("clasificacion_perros_y_gatos")

for run in range(N_RUNS):
    print(f"\nEjecutando corrida {run+1}/{N_RUNS}")

    # Definir modelo con data augmentation
    data_augmentation = keras.Sequential([
        layers.RandomFlip("horizontal_and_vertical", seed=42),
        layers.RandomRotation(0.3, seed=42),
        layers.RandomZoom(0.2, seed=42),
        layers.RandomContrast(0.2, seed=42),
    ])

    # Definir modelo optimizado
    model = keras.Sequential([
        keras.Input(shape=(64, 64, 3)),
        data_augmentation,
        layers.Conv2D(32, 3, activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPool2D(),
        layers.Dropout(0.5),
        layers.Conv2D(64, 3, activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPool2D(),
        layers.Dropout(0.5),
        layers.Conv2D(128, 3, activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.Conv2D(128, 3, activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPool2D(),
        layers.Dropout(0.5),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dropout(0.5),
        layers.Dense(1, activation='sigmoid'),
    ])

    # Compilar modelo
    model.compile(
        optimizer=tf.keras.optimizers.Adam(epsilon=0.01),
        loss='binary_crossentropy',
        metrics=['binary_accuracy']
    )

    checkpoint_cb = ModelCheckpoint("mejor_modelo.keras", save_best_only=True, monitor='val_binary_accuracy', mode='max')

    early_stopping = tf.keras.callbacks.EarlyStopping(
        min_delta=0.001,
        patience=10,
        restore_best_weights=True,
    )

    history = model.fit(
        ds_train,
        validation_data=ds_valid,
        epochs=30,
        callbacks=[early_stopping, checkpoint_cb],
    )

    # Evaluar métricas
    y_true = []
    y_pred = []
    for images, labels in ds_valid:
        preds = model.predict(images, verbose=0)
        y_true.extend(labels.numpy())
        y_pred.extend((preds > 0.5).astype(int).flatten())

    acc = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    accuracies.append(acc)
    f1_scores.append(f1)

    if acc > best_val_acc:
        best_val_acc = acc
        best_history = history.history

    with mlflow.start_run(run_name=f"run_{run+1}"):
        mlflow.log_param("optimizer", "Adam")
        mlflow.log_param("loss", "binary_crossentropy")
        mlflow.log_param("batch_size", 64)
        mlflow.log_param("image_size", (64, 64))
        mlflow.log_param("run", run + 1)
        mlflow.log_param("random_seed", 42)
        mlflow.log_metric("val_accuracy", acc)
        mlflow.log_metric("val_f1_score", f1)
        mlflow.tensorflow.log_model(model, f"modelo_cnn_run_{run+1}")

# Guardar métricas para el siguiente notebook
import pickle
with open('metrics.pkl', 'wb') as f:
    pickle.dump({'accuracies': accuracies, 'f1_scores': f1_scores, 'best_val_acc': best_val_acc, 'best_history': best_history}, f)