<a href="https://colab.research.google.com/github/JuanDiaz77/Proyecto-colab/blob/main/Generaci%C3%B3n_de_im%C3%A1genes_con_VAEs_simples_en_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# =============================================
# Importar librerías necesarias
# =============================================
import tensorflow as tf
from tensorflow.keras import layers, Model
import numpy as np
import matplotlib.pyplot as plt

# =============================================
# Cargar y preparar el dataset MNIST
# =============================================
(x_train, _), (x_test, _) = tf.keras.datasets.mnist.load_data()
x_train = x_train.astype("float32") / 255.
x_test = x_test.astype("float32") / 255.

# Añadir dimensión de canal (28,28,1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)

print(f"Dataset cargado: {x_train.shape[0]} imágenes de entrenamiento")

# =============================================
# Definir dimensiones y parámetros del modelo
# =============================================
latent_dim = 2   # Dimensión del espacio latente (puedes probar con más)

# =============================================
# Definir el Codificador (Encoder)
# =============================================
encoder_inputs = layers.Input(shape=(28, 28, 1))
x = layers.Conv2D(32, 3, activation="relu", strides=2, padding="same")(encoder_inputs)
x = layers.Conv2D(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Flatten()(x)
x = layers.Dense(16, activation="relu")(x)

# Salidas del codificador: media y log-varianza
z_mean = layers.Dense(latent_dim, name="z_mean")(x)
z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)

# Función de muestreo usando la reparametrización
def sampling(args):
    z_mean, z_log_var = args
    epsilon = tf.random.normal(shape=(tf.shape(z_mean)[0], latent_dim))
    return z_mean + tf.exp(0.5 * z_log_var) * epsilon

z = layers.Lambda(sampling, output_shape=(latent_dim,), name="z")([z_mean, z_log_var])

encoder = Model(encoder_inputs, [z_mean, z_log_var, z], name="encoder")
encoder.summary()

# =============================================
# Definir el Decodificador (Decoder)
# =============================================
latent_inputs = layers.Input(shape=(latent_dim,))
x = layers.Dense(7 * 7 * 64, activation="relu")(latent_inputs)
x = layers.Reshape((7, 7, 64))(x)
x = layers.Conv2DTranspose(64, 3, activation="relu", strides=2, padding="same")(x)
x = layers.Conv2DTranspose(32, 3, activation="relu", strides=2, padding="same")(x)
decoder_outputs = layers.Conv2DTranspose(1, 3, activation="sigmoid", padding="same")(x)

decoder = Model(latent_inputs, decoder_outputs, name="decoder")
decoder.summary()

# =============================================
# Definir el modelo VAE completo
# =============================================
class VAE(Model):
    def __init__(self, encoder, decoder, **kwargs):
        super(VAE, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder

    def train_step(self, data):
        if isinstance(data, tuple):
            data = data[0]
        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = self.encoder(data)
            reconstruction = self.decoder(z)
            # Pérdida de reconstrucción
            reconstruction_loss = tf.reduce_mean(
                tf.reduce_sum(tf.keras.losses.binary_crossentropy(data, reconstruction), axis=(1, 2))
            )
            # Pérdida KL Divergence
            kl_loss = -0.5 * tf.reduce_mean(
                tf.reduce_sum(1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var), axis=1)
            )
            total_loss = reconstruction_loss + kl_loss
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        return {"loss_total": total_loss, "recon_loss": reconstruction_loss, "kl_loss": kl_loss}

vae = VAE(encoder, decoder)
vae.compile(optimizer=tf.keras.optimizers.Adam())

# =============================================
# Entrenar el VAE
# =============================================
history = vae.fit(x_train, epochs=10, batch_size=128)

# =============================================
# Generar imágenes nuevas desde el espacio latente
# =============================================
n = 10  # número de imágenes a generar
random_latent_vectors = tf.random.normal(shape=(n, latent_dim))
generated_images = decoder.predict(random_latent_vectors)

# Mostrar imágenes generadas
plt.figure(figsize=(10, 2))
for i in range(n):
    ax = plt.subplot(1, n, i + 1)
    plt.imshow(generated_images[i].squeeze(), cmap="gray")
    plt.axis("off")
plt.suptitle("Imágenes generadas por el VAE")
plt.show()

# =============================================
# Visualizar el espacio latente
# =============================================
z_mean, _, _ = encoder.predict(x_test)
plt.figure(figsize=(6, 6))
plt.scatter(z_mean[:, 0], z_mean[:, 1], alpha=0.5)
plt.title("Representación del espacio latente (z_mean)")
plt.xlabel("z1")
plt.ylabel("z2")
plt.show()


Dataset cargado: 60000 imágenes de entrenamiento


Epoch 1/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 106ms/step - kl_loss: 3.4857 - loss_total: 209.8992 - recon_loss: 206.4135
Epoch 2/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 104ms/step - kl_loss: 5.6273 - loss_total: 174.7136 - recon_loss: 169.0863
Epoch 3/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 103ms/step - kl_loss: 6.1069 - loss_total: 168.1200 - recon_loss: 162.0131
Epoch 4/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 103ms/step - kl_loss: 6.3956 - loss_total: 164.3711 - recon_loss: 157.9755
Epoch 5/10
[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 103ms/step - kl_loss: 6.4982 - loss_total: 162.1209 - recon_loss: 155.6226
Epoch 6/10


KeyboardInterrupt: 