# Setup

In [2]:
import warnings
import numpy as np
from tensorflow.keras.layers import Input, Dense, Lambda, Concatenate, Conv2D, Conv2DTranspose, Flatten, Reshape
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers.legacy import Adam

import matplotlib.pyplot as plt
from tensorflow.python.framework.ops import disable_eager_execution
disable_eager_execution()

warnings.filterwarnings('ignore')

 # CVAE simple

## Data processing

In [3]:
(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
X_train = X_train.astype('float32') / 255.
X_test = X_test.astype('float32') / 255.

n_pixels = np.prod(X_train.shape[1:])
X_train = X_train.reshape((len(X_train), n_pixels))
X_test = X_test.reshape((len(X_test), n_pixels))
y_train = to_categorical(Y_train)
y_test = to_categorical(Y_test)

## Modelamiento
### Hiperparámetros

In [4]:
batch_size = 250 # batch size
latent_dim = 2 # latent space size
optim = Adam(lr=0.001)
n_x = X_train.shape[1]
n_y = y_train.shape[1]
n_epoch = 50

### Encoder

In [5]:
encoder_inp1 = Input(shape=(n_x,), name="input_image")
encoder_inp2 = Input(shape=(n_y,), name="input_label")
enc_concat = Concatenate(name="encoder_concatenate")([encoder_inp1, encoder_inp2])
encoder_hidden = Dense(512, name="hidden_layer")(enc_concat)
mu = Dense(latent_dim, activation='linear', name="mu")(encoder_hidden)
l_sigma = Dense(latent_dim, activation='linear', name="l_sigma")(encoder_hidden)
def sample_z(args):
    mu, l_sigma = args
    eps = K.random_normal(shape=(latent_dim, ), mean=0., stddev=1.) # shape antes => (batch_size, latent_dim)
    return mu + K.exp(l_sigma / 2) * eps
z = Lambda(sample_z, output_shape = (latent_dim, ), name="latent_vector")([mu, l_sigma]) # output encoder

encoder = Model([encoder_inp1, encoder_inp2], z, name="encoder")

In [6]:
encoder.summary()

Model: "encoder"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_image (InputLayer)       [(None, 784)]        0           []                               
                                                                                                  
 input_label (InputLayer)       [(None, 10)]         0           []                               
                                                                                                  
 encoder_concatenate (Concatena  (None, 794)         0           ['input_image[0][0]',            
 te)                                                              'input_label[0][0]']            
                                                                                                  
 hidden_layer (Dense)           (None, 512)          407040      ['encoder_concatenate[0][0]

### Decoder

In [7]:
decoder_inp1 = Input(shape=(latent_dim), name="input_latent_vector")
decoder_inp2 = Input(shape=(n_y), name = "input_label")
dec_concat = Concatenate(name="decoder_concat")([decoder_inp1, decoder_inp2]) 
decoder_hidden = Dense(512, activation="relu", name="hidden_layer")(dec_concat)
output = Dense(n_x, activation="sigmoid", name="output_img")(decoder_hidden) # output img
decoder = Model([decoder_inp1, decoder_inp2], output, name="decoder")
decoder.summary()

Model: "decoder"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_latent_vector (InputLaye  [(None, 2)]         0           []                               
 r)                                                                                               
                                                                                                  
 input_label (InputLayer)       [(None, 10)]         0           []                               
                                                                                                  
 decoder_concat (Concatenate)   (None, 12)           0           ['input_latent_vector[0][0]',    
                                                                  'input_label[0][0]']            
                                                                                            

### Conditional variational autoencoder

In [8]:
latent_vector = encoder(inputs=[encoder_inp1, encoder_inp2]) # z
output_img = decoder(inputs=[latent_vector, encoder_inp2])
CVAE = Model(inputs=[encoder_inp1, encoder_inp2], outputs=output_img, name="CVAE")
CVAE.summary()

Model: "CVAE"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_image (InputLayer)       [(None, 784)]        0           []                               
                                                                                                  
 input_label (InputLayer)       [(None, 10)]         0           []                               
                                                                                                  
 encoder (Functional)           (None, 2)            409092      ['input_image[0][0]',            
                                                                  'input_label[0][0]']            
                                                                                                  
 decoder (Functional)           (None, 784)          408848      ['encoder[0][0]',             

## Entrenamiento

In [9]:
def vae_loss(y_true, y_pred):
    recon = recon_loss(y_true, y_pred)
    kl = KL_loss(y_true, y_pred)
    return recon + kl

def KL_loss(y_true, y_pred): # it doesnt use y_true and y_pred but the parameters are necessary for compiling
     # importante se usan las capas de mu y sigma del encoder
	return(0.5 * K.sum(K.exp(l_sigma) + K.square(mu) - 1. - l_sigma, axis=1))

def recon_loss(y_true, y_pred):
	return K.sum(K.binary_crossentropy(y_true, y_pred), axis=-1)

In [10]:
CVAE.compile(optimizer=optim, loss=vae_loss, metrics = [KL_loss, recon_loss])

In [11]:
cvae_hist = CVAE.fit([X_train, y_train], X_train, verbose = 1, batch_size=batch_size, epochs=n_epoch,
                      validation_data = ([X_test, y_test], X_test),
                      callbacks = [EarlyStopping(patience = 5)])

Train on 60000 samples, validate on 10000 samples


2023-07-24 22:55:11.054031: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-07-24 22:55:11.067620: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-07-24 22:55:11.067760: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

Epoch 1/50

2023-07-24 22:55:12.146116: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:637] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/50
11750/60000 [====>.........................] - ETA: 0s - loss: 159.3685 - KL_loss: 4.3008 - recon_loss: 155.0678

2023-07-24 22:55:12.658880: W tensorflow/c/c_api.cc:300] Operation '{name:'loss/mul' id:278 op device:{requested: '', assigned: ''} def:{{{node loss/mul}} = Mul[T=DT_FLOAT, _has_manual_control_dependencies=true](loss/mul/x, loss/decoder_loss/value)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.


Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50


Epoch 43/50


## Predicción

Por defecto utilizamos un vector latente de ceros. Para variar el output se puede cambiar el vector latente.

In [None]:
z = np.zeros(shape=(1, latent_dim)) # latent vector
label = np.array([[0,1,0,0,0,0,0,0,0,0]]) # label in one hot encoding
plt.figure(figsize=(3, 3))
plt.imshow(decoder.predict([z, label]).reshape(28,28), cmap = plt.cm.gray)
plt.show()

 # CVAE con capas convolucionales

## Data processing

In [None]:
(imgs_train, labels_train), (imgs_test, labels_test) = mnist.load_data()
imgs_train = (imgs_train.astype('float32') / 255)[:, :, :, np.newaxis]
imgs_test = (imgs_test.astype('float32') / 255)[:, :, :, np.newaxis]

labels_train = to_categorical(labels_train)
labels_test = to_categorical(labels_test)

In [None]:
print("image shape:", imgs_train.shape[1:])
print("n_cat shape:", labels_train.shape[1:])

## Modelamiento
### Hiperparámetros

In [None]:
batch_size = 250 # batch size
latent_dim = 2 # latent space size
optim = Adam(lr=0.001)
shape_img = imgs_train.shape[1:]
n_cat = labels_train.shape[1]
n_epoch = 50

### Encoder

In [None]:
enc_input_image = Input(shape=(shape_img), name="input_image")
enc_input_label = Input(shape=(n_cat), name="input_label")

# convolutionals block
conv1 = Conv2D(filters=32, kernel_size=3, strides=2, activation="relu")(enc_input_image)
conv2 = Conv2D(filters=64, kernel_size=3, strides=2, activation='relu')(conv1)

# concat
flattened = Flatten()(conv2)
enc_concat = Concatenate()([flattened, enc_input_label])

encoder_hidden = Dense(512, name="hidden_layer")(enc_concat)
mu = Dense(latent_dim, activation='linear', name="mu")(encoder_hidden)
l_sigma = Dense(latent_dim, activation='linear', name="l_sigma")(encoder_hidden)
def sample_z(args):
    mu, l_sigma = args
    eps = K.random_normal(shape=(latent_dim, ), mean=0., stddev=1.) # shape antes => (batch_size, latent_dim)
    return mu + K.exp(l_sigma / 2) * eps
z = Lambda(sample_z, output_shape = (latent_dim, ), name="latent_vector")([mu, l_sigma]) # output encoder

encoder = Model([enc_input_image, enc_input_label], z, name="encoder")
encoder.summary()

### Decoder

In [None]:
dec_inp_latent_vector = Input(shape=(latent_dim), name="input_latent_vector")
decoder_inp_label = Input(shape=(n_cat), name = "input_label")
dec_concat = Concatenate(name="decoder_concat")([dec_inp_latent_vector, decoder_inp_label])

decoder_hidden = Dense(512, activation="relu", name="hidden_layer")(dec_concat)
decoder_hidden = Dense(7*7*32, activation="relu", name="hidden_layer2")(decoder_hidden)
reshaped = Reshape(target_shape=(7,7,32))(decoder_hidden)

# convolutionals block
dec_conv1 = Conv2DTranspose(filters=64, kernel_size=3, strides=2, activation="relu", padding='same')(reshaped)
dec_conv2 = Conv2DTranspose(filters=32, kernel_size=3, strides=2, activation="relu", padding='same')(dec_conv1)
dec_output_img = Conv2DTranspose(filters=1, kernel_size=3, strides=1, padding='same')(dec_conv2)
dec_output_img = Flatten()(dec_output_img)
decoder = Model([dec_inp_latent_vector, decoder_inp_label], dec_output_img, name="decoder")
decoder.summary()

### Conditional Variational autoencoder

In [None]:
latent_vector = encoder(inputs=[enc_input_image, enc_input_label]) # z
output_img = decoder(inputs=[latent_vector, enc_input_label])
CVAE = Model(inputs=[enc_input_image, enc_input_label], outputs=output_img, name="CVAE")
CVAE.summary()

## Entrenamiento

In [None]:
def vae_loss(y_true, y_pred):
    recon = recon_loss(y_true, y_pred)
    kl = KL_loss(y_true, y_pred)
    return recon + kl

def KL_loss(y_true, y_pred): # it doesnt use y_true and y_pred but the parameters are necessary for compiling
     # importante se usan las capas de mu y sigma del encoder
    return(0.5 * K.sum(K.exp(l_sigma) + K.square(mu) - 1. - l_sigma, axis=1))

def recon_loss(y_true, y_pred):
    return K.sum(K.binary_crossentropy(y_true, y_pred), axis=-1)

In [None]:
CVAE.compile(optimizer=optim, loss=vae_loss, metrics=[KL_loss, recon_loss])

In [None]:
# the output of the decoder is a flattened img, 
# so we need to flatten the true values (input images from (28,28,1) => 784)
y_train = imgs_train.reshape(imgs_train.shape[0], -1)
y_test = imgs_test.reshape(imgs_test.shape[0], -1)
print(y_train.shape)

In [None]:
cvae_hist = CVAE.fit([imgs_train, labels_train], y_train, verbose = 1, batch_size=batch_size, epochs=n_epoch,
                      validation_data = ([imgs_test, labels_test], y_test),
                      callbacks = [EarlyStopping(patience = 5)])

## Prediccion

In [None]:
z = np.zeros(shape=(1, latent_dim))  # latent vector
label = np.array([[0, 1, 0, 0, 0, 0, 0, 0, 0, 0]])  # label in one hot encoding
plt.figure(figsize=(3, 3))
plt.imshow(decoder.predict([z, label]).reshape(28, 28), cmap=plt.cm.gray)
plt.show()