# Arquitectura del ``Autoencoder``

In [32]:
import tensorflow as tf
from tensorflow import keras

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
from keras import Sequential
from keras import losses, metrics, optimizers
from keras.applications import mobilenet
from keras.layers import Input, Dense, Conv2D, Flatten, LeakyReLU, MaxPooling2D, Conv2DTranspose, GlobalAveragePooling2D, Reshape

import numpy as np

Num GPUs Available:  1


## Regla de la piramide geometrica

La regla de la piramide geometrica sirve para ayudar determinar el tamaño de las capas ocultas en base al tamaño de las capas de input y output y al numero de capas ocultas. 

Esta aproximacion esta propuesta por Masters(1993): 
>  "For a three layer network with n input and m output neurons, the hidden layer would have sqrt(N * M) neurons."
>
> -- <cite> Masters, Timothy. Pratical neural network recipes in C++. Morgan Kaufmann, 1993.</cite>

[Enlace al artículo](https://eulertech.wordpress.com/2018/01/02/how-to-choose-the-number-of-hidden-layers-and-nodes-in-a-feedforward-neural-network/)

In [2]:
def pyramid_rule(h_layers, input_size, output_size):
    layers = []
    if h_layers < 1:
        print("No layers")
        return []
    print("Layers for input %d and output %d:" % (input_size,  output_size))
    rate = (input_size/output_size)**(1/(h_layers+1))
    for l in range(h_layers):
        layer_size = output_size*(rate**(h_layers-l))
        layer_size = round(layer_size)
        layers.append(layer_size)
        print("Layer %d: %d neurons" % (l+1, layer_size))
    return layers

## EMD(Earth Mover's Distance) ``WIP``

EMD es una medida de distancia entre distribuciones de probabilidad, que consiste en representar ambas distribuciones como montones de tierra, en los que la distancia se determina en cuanto es el trabajo minimo que llevaria transformar un monticulo en otro. Matematicamente a EMD se la conoce como la métrica de Wasserstein.

$\sqrt{\frac{1}{n}\sum_{i=1}^{n}(p_i-q_i)^2}$

Siendo p y q la prediccion y el verdadero.

In [3]:
def EMD_loss(y_true, y_pred):
	n = np.prod(y_true.shape)
	p = tf.math.subtract(y_true, y_pred)
	p = tf.math.square(p)
	p = tf.math.reduce_sum(p)
	return tf.math.sqrt(tf.math.divide(p,n))

## Encoders

### Transfer Encoder

Éste ``encoder`` va a ser un modelo basado en ``transfer learning``, vamos a tomar la red de ``mobilenet``, entranada para imagenes de ``imagenet`` sin las capa de clasificacion final, con una entrada de tamaño _img\_shape_ y en el output colocamos la 'representacion latente' una codificacion de la imagen que nos permitiría reconstruirla con un ``decoder``.

In [4]:
# https://www.tensorflow.org/api_docs/python/tf/keras/applications/mobilenet/preprocess_input
# https://keras.io/guides/transfer_learning/
def build_transf_encoder(dim_latente, img_shape, trainable=True):
    inputs = Input(shape=img_shape)
    x=tf.cast(inputs, tf.float32)
    x=tf.keras.layers.Resizing(128,128)(x)
    #x=keras.applications.mobilenet.preprocess_input(x)  #dataformat por defecto es chanel last
    core = mobilenet.MobileNet(input_shape=((128,128,3)), weights="imagenet", include_top=False)
    core.trainable = trainable
    model = core(x, training=trainable)
    model = GlobalAveragePooling2D()(model)
    repr_latente = Dense(dim_latente)(model)
    return keras.Model(inputs, repr_latente)


In [5]:

enc = build_transf_encoder(20, (32,32,3))
enc.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 32, 32, 3)]       0         
                                                                 
 tf.cast (TFOpLambda)        (None, 32, 32, 3)         0         
                                                                 
 resizing (Resizing)         (None, 128, 128, 3)       0         
                                                                 
 mobilenet_1.00_128 (Functio  (None, 4, 4, 1024)       3228864   
 nal)                                                            
                                                                 
 global_average_pooling2d (G  (None, 1024)             0         
 lobalAveragePooling2D)                                          
                                                                 
 dense (Dense)               (None, 20)                20500 

### Encoder denso

In [6]:
def build_dense_encoder(dim_latente, img_shape, depth=2):
    layer_sizes = pyramid_rule(depth, np.prod(img_shape), dim_latente)
    model =  keras.Sequential()
    model.add(Flatten(input_shape=img_shape))
    for ls in layer_sizes:
        model.add(Dense(ls))
        model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(dim_latente))
    return model

In [7]:
enc = build_dense_encoder(20, (32,32,3))
enc.summary()

Layers for input 3072 and output 20:
Layer 1: 574 neurons
Layer 2: 107 neurons
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 3072)              0         
                                                                 
 dense_1 (Dense)             (None, 574)               1763902   
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 574)               0         
                                                                 
 dense_2 (Dense)             (None, 107)               61525     
                                                                 
 leaky_re_lu_1 (LeakyReLU)   (None, 107)               0         
                                                                 
 dense_3 (Dense)             (None, 20)                2160      
                                           

In [8]:
def build_dense_encoder2(dim_latente, img_shape, depth=2):
    #layer_sizes = pyramid_rule(depth, np.prod(img_shape), dim_latente)
    layer_sizes=list(1000 for _ in range(depth))
    model =  keras.Sequential()
    model.add(Flatten(input_shape=img_shape))
    for ls in layer_sizes:
        model.add(Dense(ls))
        model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(dim_latente))
    return model

In [9]:
enc = build_dense_encoder2(2, (28,28,1))
enc.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_1 (Flatten)         (None, 784)               0         
                                                                 
 dense_4 (Dense)             (None, 1000)              785000    
                                                                 
 leaky_re_lu_2 (LeakyReLU)   (None, 1000)              0         
                                                                 
 dense_5 (Dense)             (None, 1000)              1001000   
                                                                 
 leaky_re_lu_3 (LeakyReLU)   (None, 1000)              0         
                                                                 
 dense_6 (Dense)             (None, 2)                 2002      
                                                                 
Total params: 1,788,002
Trainable params: 1,788,002
No

### Encoder convolucional

In [54]:
def build_conv_encoder(dim_latente, img_shape, depth=2):
    model =  keras.Sequential()
    filter = 8
    model.add(Conv2D(filter, (3,3), padding="same", activation="relu", input_shape=img_shape))
    model.add(MaxPooling2D())
    for _ in range(depth-1):
        filter*=2
        model.add(Conv2D(filter, (3,3), padding="same", activation="relu"))
        model.add(MaxPooling2D())
    model.add(Flatten())
    model.add(Dense(dim_latente))
    
    return model

In [55]:
enc = build_conv_encoder(20, (28,28,1))
enc.summary()

Model: "sequential_23"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_51 (Conv2D)          (None, 28, 28, 8)         80        
                                                                 
 max_pooling2d_12 (MaxPoolin  (None, 14, 14, 8)        0         
 g2D)                                                            
                                                                 
 conv2d_52 (Conv2D)          (None, 14, 14, 16)        1168      
                                                                 
 max_pooling2d_13 (MaxPoolin  (None, 7, 7, 16)         0         
 g2D)                                                            
                                                                 
 flatten_6 (Flatten)         (None, 784)               0         
                                                                 
 dense_47 (Dense)            (None, 20)              

## Decoders

### Decoder denso

In [12]:
def build_dense_decoder(dim_latente, img_shape, depth=2):
    model = keras.Sequential()
    layer_sizes = pyramid_rule(depth, dim_latente, np.prod(img_shape))
    model.add(Dense(layer_sizes[0], input_dim=dim_latente))
    model.add(LeakyReLU(alpha=0.2))
    for i in range(1, depth):
        model.add(Dense(layer_sizes[i], input_dim=dim_latente))
        model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(np.prod(img_shape), activation=keras.activations.sigmoid))
    model.add(Reshape(img_shape))
    
    return model

In [13]:
enc = build_dense_decoder(20, (32,32,3))
enc.summary()

Layers for input 20 and output 3072:
Layer 1: 107 neurons
Layer 2: 574 neurons
Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_8 (Dense)             (None, 107)               2247      
                                                                 
 leaky_re_lu_4 (LeakyReLU)   (None, 107)               0         
                                                                 
 dense_9 (Dense)             (None, 574)               61992     
                                                                 
 leaky_re_lu_5 (LeakyReLU)   (None, 574)               0         
                                                                 
 dense_10 (Dense)            (None, 3072)              1766400   
                                                                 
 reshape (Reshape)           (None, 32, 32, 3)         0         
                                         

In [14]:
def build_dense_decoder2(dim_latente, img_shape, depth=2):
    model = keras.Sequential()
    #layer_sizes = pyramid_rule(depth, dim_latente, np.prod(img_shape))
    layer_sizes=list(1000 for _ in range(depth))
    model.add(Dense(layer_sizes[0], input_dim=dim_latente))
    model.add(LeakyReLU(alpha=0.2))
    for i in range(1, depth):
        model.add(Dense(layer_sizes[i], input_dim=dim_latente))
        model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(np.prod(img_shape), activation=keras.activations.sigmoid))
    model.add(Reshape(img_shape))
    
    return model

In [15]:
enc = build_dense_decoder2(2, (28,28,1))
enc.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_11 (Dense)            (None, 1000)              3000      
                                                                 
 leaky_re_lu_6 (LeakyReLU)   (None, 1000)              0         
                                                                 
 dense_12 (Dense)            (None, 1000)              1001000   
                                                                 
 leaky_re_lu_7 (LeakyReLU)   (None, 1000)              0         
                                                                 
 dense_13 (Dense)            (None, 784)               784784    
                                                                 
 reshape_1 (Reshape)         (None, 28, 28, 1)         0         
                                                                 
Total params: 1,788,784
Trainable params: 1,788,784
No

### Decoder convolucional

In [58]:
def build_conv_decoder(dim_latente, img_shape, depth=2):
    startLayer = list(int(np.floor(d/(2**depth))) for d in img_shape)
    filter = 8 * (2 ** (depth-1))
    startLayer[-1] = filter
    startLayer = tuple(startLayer) 

    model=keras.Sequential()
    model.add(Dense(dim_latente, input_dim=(dim_latente)))
    model.add(Dense(np.prod(startLayer)))              
    model.add(Reshape(startLayer))      
    for _ in range(depth):
        model.add(Conv2DTranspose(filter, kernel_size=(3,3), strides=2, padding="same", activation="relu"))
        filter/=2
    model.add(Conv2D(img_shape[-1], (3, 3), padding="same", activation=keras.activations.sigmoid))
    
    return model

In [59]:
dec = build_conv_decoder(20, (28,28,1))
dec.summary()

Model: "sequential_25"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_50 (Dense)            (None, 20)                420       
                                                                 
 dense_51 (Dense)            (None, 784)               16464     
                                                                 
 reshape_18 (Reshape)        (None, 7, 7, 16)          0         
                                                                 
 conv2d_transpose_23 (Conv2D  (None, 14, 14, 16)       2320      
 Transpose)                                                      
                                                                 
 conv2d_transpose_24 (Conv2D  (None, 28, 28, 8)        1160      
 Transpose)                                                      
                                                                 
 conv2d_54 (Conv2D)          (None, 28, 28, 1)       

In [20]:
#optimizer = keras.optimizers.Adam(0.0002, 0.5)
#dim_latente=64
#img_shape=X_train[0].shape

#encoder = build_conv_encoder(dim_latente, img_shape)
#decoder = build_conv_decoder(dim_latente, img_shape)
#img = keras.layers.Input(img_shape)

#encoder_rep = encoder(img)
#autoencoder_out = decoder(encoder_rep)

#autoencoder = keras.Model(img, autoencoder_out)
#autoencoder.compile(loss='mse', optimizer=optimizer, metrics=["accuracy"])
#out = autoencoder.predict(X_train[0])

## Discriminador

El discriminador va a tener como entrada la codificacion latente de las imagenes y como salida una neurona que discrimina entre imagenes "reales" y "falsas". De esta forma entrenamos al encoder para que codifique con la distribucion que usemos para generar las "imagenes reales", en este caso, una distribución normal.

In [21]:
def build_discriminator(dim_latente, depth = 2):
    #layer_sizes = pyramid_rule(depth, dim_latente, 1)
    layer_sizes=list(1000 for _ in range(depth))
    model = Sequential()
    model.add(Dense(layer_sizes[0], input_dim=dim_latente))
    model.add(LeakyReLU(alpha=0.2))
    for i in range(1, depth):
        model.add(Dense(layer_sizes[i]))
        model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(1, activation=keras.activations.sigmoid))
    encoded = Input(shape=dim_latente)
    valid = model(encoded)
    return keras.Model(encoded, valid)

### Discriminador sensible a etiquetas

In [22]:
# https://www.pyimagesearch.com/2019/02/04/keras-multiple-inputs-and-mixed-data/
def build_class_discriminator(dim_latente, clases, depth = 2):
    # Rama latente
    #latent_layer_sizes = pyramid_rule(depth, dim_latente, 1)
    latent_layer_sizes=list(1000 for _ in range(depth))
    latent_input = Input(shape=dim_latente)
    currentLayer = latent_input
    for i in range(0, depth):
        currentLayer = Dense(latent_layer_sizes[i])(currentLayer)
        currentLayer = LeakyReLU(alpha=0.2)(currentLayer)
    x = keras.Model(inputs=latent_input, outputs=currentLayer)

    # Rama de clases
    #class_layer_sizes = pyramid_rule(depth, clases, 1)
    class_layer_sizes=list(1000 for _ in range(depth))
    class_input = Input(shape=clases)
    currentLayer = class_input
    for i in range(0, depth):
        currentLayer = Dense(class_layer_sizes[i])(currentLayer)
        currentLayer = LeakyReLU(alpha=0.2)(currentLayer)
    y = keras.Model(inputs=class_input, outputs=currentLayer)

    combined = tf.keras.layers.concatenate([x.output, y.output])
    output = Dense(2)(combined)
    output = Dense(1)(output)
    return keras.Model(inputs = [x.input, y.input], outputs=output)
    

In [23]:
disc = build_class_discriminator(20, 10)
disc.summary()

Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, 20)]         0           []                               
                                                                                                  
 input_4 (InputLayer)           [(None, 10)]         0           []                               
                                                                                                  
 dense_16 (Dense)               (None, 1000)         21000       ['input_3[0][0]']                
                                                                                                  
 dense_18 (Dense)               (None, 1000)         11000       ['input_4[0][0]']                
                                                                                            

In [None]:
# https://www.pyimagesearch.com/2019/02/04/keras-multiple-inputs-and-mixed-data/
def build_class_discriminator2(dim_latente, clases, depth = 2):
    layer_sizes=list(1000 for _ in range(depth))
    latent_input = Input(shape=dim_latente)
    class_input = Input(shape=clases)
    concated_input = tf.keras.layers.concatenate([latent_input, class_input])
    model = Sequential()
    model.add(Dense(layer_sizes[0], input_dim=dim_latente+clases))
    model.add(LeakyReLU(alpha=0.2))
    for i in range(1, depth):
        model.add(Dense(layer_sizes[i]))
        model.add(LeakyReLU(alpha=0.2))
    model.add(Dense(1, activation=keras.activations.sigmoid))
    output = model(concated_input)
    return keras.Model([latent_input, class_input], output)
    
    

In [None]:
disc = build_class_discriminator2(20, 10)
disc.summary()

Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, 20)]         0           []                               
                                                                                                  
 input_4 (InputLayer)           [(None, 10)]         0           []                               
                                                                                                  
 dense_16 (Dense)               (None, 1000)         21000       ['input_3[0][0]']                
                                                                                                  
 dense_18 (Dense)               (None, 1000)         11000       ['input_4[0][0]']                
                                                                                            

# Redes

Parametros

Metodo para la construccion

In [24]:
def assemble_AAE(dim_latente, img_shape, enc_model=build_dense_encoder, dec_model=build_dense_decoder, disc_model = build_discriminator, 
                compilation_kwargs={}, enc_kwargs={}, dec_kwargs={}, disc_kwargs={}):
    #Parameters
    disc_params = {"dim_latente":dim_latente}
    disc_params.update(disc_kwargs)
    enc_params = {"dim_latente":dim_latente, "img_shape":img_shape}
    enc_params.update(enc_kwargs)
    dec_params = {"dim_latente":dim_latente, "img_shape":img_shape}
    dec_params.update(dec_kwargs)

    cp = {"ae_loss":losses.mean_squared_error, 
          "disc_loss": losses.binary_crossentropy, 
          "optimizer" : keras.optimizers.Adam(0.0002, 0.5)
         } #cp = compilation params
    cp.update(compilation_kwargs)
    # Discriminador
    discriminator = disc_model(**disc_params)
    discriminator.compile(loss=cp["disc_loss"], metrics="accuracy")

    # Encoder y decoder
    encoder = enc_model(**enc_params)
    decoder = dec_model(**dec_params)

    # Autoencoder
    # el encoder toma un imagen y la codifica y el decoder toma la codificacion e intenta regenerar la imagen
    img = Input(shape=img_shape, name="data")
    encoded = encoder(img)
    reconstructed = decoder(encoded)
    
    if "clases" in disc_params.keys():
        clase = Input(shape=disc_params["clases"], name = "labels")
        disc_input = [encoded, clase]
        aae_input = [img, clase]
    else:
        disc_input = encoded
        aae_input = img

    # para el autoencoder adversario solo queremos entrenar el generador, no el discriminador
    discriminator.trainable=False

    # El discriminador evalua la validez de la codificacion
    validez = discriminator(disc_input)

    # Autoencoder adversario 
    a_autoencoder = keras.Model(aae_input, [reconstructed, validez])
    a_autoencoder.compile(loss=[cp["ae_loss"], cp["disc_loss"]], optimizer=cp["optimizer"])#, loss_weights=[0.999, 0.001])
    return (encoder, decoder, discriminator, a_autoencoder)