In [None]:
# Fist install the library
#%pip install aepy

In [None]:
import numpy as np

from sklearn.metrics import accuracy_score
from keras_core import utils

from aepy.data.datasets import load_MNIST
from aepy.data.utils import evaluate, display_diff, add_noise
from aepy.models.ae.ae_model import AE
from aepy.models.base.default_architectures import VanillaEncoder, VanillaDecoder
from aepy.pipelines.training import TrainingPipeline

Download and preprocess the dataset. In this example, the selected dataset is the well-known MNIST composed of handwritten number images.

In [None]:
# Load MNIST dataset
x_train, y_train, x_test, y_test = load_MNIST(persistant=True)

# Obtaint number of clasess
n_classes = len(set(y_train))

# Convert labels to categorical
y_train = utils.to_categorical(y_train, n_classes)
y_test = utils.to_categorical(y_test, n_classes)

Since we want to train a denoising autoencoder, we also need to add some noise to the images.
The noise factor should be a float between 0 and 1.

In [None]:
# Add noise to the train and test data
x_train_noisy = add_noise(x_train, noise_factor=0.4)
x_test_noisy = add_noise(x_test, noise_factor=0.4)

x_train = x_train.reshape(x_train.shape[0], -1)
x_train_noisy = x_train_noisy.reshape(x_train_noisy.shape[0], -1)
x_test = x_test.reshape(x_test.shape[0], -1)
x_test_noisy = x_test_noisy.reshape(x_test_noisy.shape[0], -1)

Compose the dictionaries to feed the autoencoder during training and evaluation phase.
Note in train how the train labels correspond to the original MNIST images (without noise).

In [None]:
train_data = dict(data=x_train_noisy.astype(float), labels=x_train)
test_data = dict(data=x_test_noisy.astype(float), labels=y_test)

Since the denoising autoencoder in structure is a normal autoencoder (only its functionality varies), the encoder and decoder are the vanilla ones, you can specify the depth and number of neurons per layer in each using the layers_conf parameter.

In [None]:
# Model creation
model = AE(input_dim=(x_train_noisy.shape[0], x_train_noisy.shape[1]), 
            latent_dim=2, encoder=VanillaEncoder, decoder=VanillaDecoder, layers_conf=[64, 32])

Define the training pipeline. There you can fix some hyperparameters realted to the training phase of the autoencoder, like learning rate, bath size, numer of epochs, etc

In [None]:
pipe = TrainingPipeline(name='training_pipeline',
                        model=model, num_epochs=10)

trained_model = pipe(train_data=train_data)

Evaluation phase using test data

In [None]:
y_hat = trained_model.predict(test_data)

Finally the original images with noise can be graphically compared with the output images of the autoencoder.

In [None]:
display_diff(x_test_noisy, y_hat['recon'])