# Autoencoders 

Autoencoders are neural networks that learn low-dimensional latent representations from which the original data can be reconstructed as well as possible.

In [None]:
from keras.layers import Input, Dense, Lambda
from keras.models import Model
from keras import backend as K

In [None]:
# Do not run yet
 def autoencoder(X, n_components=2):        
    def sampling(args):
        z_mean, z_log_var = args
        epsilon = K.random_normal(shape=(n_components,))
        return z_mean + K.exp(z_log_var) * epsilon

    layer_sizes = [64,32,16,8]
    #encoder
    inputs = Input(shape=(X.shape[1],), name='encoder_input')
    x = inputs
    for size in layer_sizes:
        x = Dense(size, activation='relu',kernel_initializer='he_uniform')(x)

    z_mean = Dense(n_components,kernel_initializer='he_uniform', name='latent_mean')(x)
    z_log_var = Dense(n_components,kernel_initializer='he_uniform', name='latent_sigma')(x)

    z = Lambda(sampling, output_shape=(n_components,))([z_mean, z_log_var])
    encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder')

    #decoder
    latent_inputs = Input(shape=(n_components,), name='decoder_input_sampling')
    x = latent_inputs
    for size in layer_sizes[::-1]:
        x = Dense(size, activation='relu',kernel_initializer='he_uniform')(x)
    outputs = Dense(X.shape[1] ,activation='sigmoid',kernel_initializer='he_uniform',name='decoder_output')(x)
    decoder = Model(latent_inputs, outputs, name='decoder')
        
    #autoencoder
    vae = Model(inputs, decoder(encoder(inputs)[2]), name='vae')

    def vae_loss(x, x_decoded_mean):
        xent_loss = K.mean(K.square((x- x_decoded_mean)))
        kl_loss = - 0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
        return xent_loss + kl_loss
    vae.compile(optimizer='adam', loss=vae_loss)

    X_01 = (X-X.min())/(X.max()-X.min())
    t0 = time()
    if  not onlyDraw  or not 'VAE' in precomputed_results:
        vae.fit(x=X_01,y=X_01,epochs=200,verbose=0)
        Y_VAE = encoder.predict(X)[0]
        viz_results['VAE'] = Y_VAE
    else:
        Y_VAE = viz_results['VAE']
    t1 = time()
    print("VAE: %.2g sec" % (t1 - t0))
    i += 1
    ax = fig.add_subplot(n_subplots_x,n_subplots_y,i)
    plt.scatter(Y_VAE[:, 0], Y_VAE[:, 1], c=color1, cmap=cmap)
    plt.title("VAE",fontdict = {'fontsize' : title_fontsize})
    ax.xaxis.set_major_formatter(NullFormatter())
    ax.yaxis.set_major_formatter(NullFormatter())
    plt.axis('tight')

    plt.tight_layout()
    
    return viz_results

scArches, plain VAE or rna-seq-vae
Can we use any pretrained model for plants on our small dataset?
Points to mention: Avoid that samples are learned by heart – use regularization; latent space should have meaningful structure (e.g. show clustering of instances)