In [None]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf

In [None]:
from algorithms.utils import MNISTLoader
from algorithms import softmax, mlp, cnn, auto_encoder

In [None]:
data_loader = MNISTLoader()

# Pretrain AutoEncoder

In [None]:
stacked_auto_encoder = auto_encoder.StackedAutoEncoder([1024, 512, 128])
stacked_auto_encoder.build(input_shape=(1, 784))
stacked_auto_encoder.train(data_loader, train_layer=0, num_epochs=5, learning_rate=1e-4, batch_size=64, sparse=True, denoise=True)
stacked_auto_encoder.train(data_loader, train_layer=1, num_epochs=5, learning_rate=1e-4, batch_size=64, sparse=False, denoise=True)
stacked_auto_encoder.train(data_loader, train_layer=2, num_epochs=5, learning_rate=1e-4, batch_size=64, sparse=False, denoise=True)

In [None]:
X, y = next(data_loader())
img = X[0]
plt.imshow(img.squeeze(), cmap='gray')
plt.show()
img_reconstruct_1 = stacked_auto_encoder(img.reshape(1, 784), stop_at=1).numpy().reshape([28, 28])
plt.imshow(img_reconstruct_1, cmap='gray')
plt.show()
img_reconstruct_2 = stacked_auto_encoder(img.reshape(1, 784), stop_at=2).numpy().reshape([28, 28])
plt.imshow(img_reconstruct_2, cmap='gray')
plt.show()
img_reconstruct_3 = stacked_auto_encoder(img.reshape(1, 784), stop_at=3).numpy().reshape([28, 28])
plt.imshow(img_reconstruct_3, cmap='gray')
plt.show()

In [None]:
def train_model(model, num_epochs=5, learning_rate=1e-4, batch_size=64):
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy()

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

    test_loss = tf.keras.metrics.Mean(name='test_loss')
    test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
    @tf.function
    def train_on_batch(X_batch, y_batch):
        with tf.GradientTape() as tape:
            y_pred = model(X_batch)
            loss = loss_object(y_true=y_batch, y_pred=y_pred)
            loss = tf.reduce_mean(loss)
        grads = tape.gradient(loss, model.variables)
        optimizer.apply_gradients(grads_and_vars=zip(grads, model.variables))

        train_loss(loss)
        train_accuracy(y_batch, y_pred)
        return loss

    @tf.function
    def test_on_batch(X_batch, y_batch):
        y_pred = model(X_batch)
        t_loss = loss_object(y_batch, y_pred)

        test_loss(t_loss)
        test_accuracy(y_batch, y_pred)
        return t_loss

    for epoch in range(num_epochs):

        train_loss.reset_states()
        train_accuracy.reset_states()
        test_loss.reset_states()
        test_accuracy.reset_states()

        # Training
        for batch_index, (X_batch, y_batch) in enumerate(data_loader(batch_size=batch_size, data_type='train')):
            X_batch = X_batch.reshape(X_batch.shape[0], X_batch.shape[1]*X_batch.shape[2])
            loss = train_on_batch(X_batch, y_batch)
            template = '[Training] Epoch {}, Batch {}/{}, Loss: {}, Accuracy: {:.2%} '
            print(template.format(epoch+1,
                                batch_index,
                                data_loader.train_size // batch_size,
                                loss,
                                train_accuracy.result()),
                end='\r')

        # Testing
        for batch_index, (X_batch, y_batch) in enumerate(data_loader(batch_size=batch_size, data_type='test')):
            X_batch = X_batch.reshape(X_batch.shape[0], X_batch.shape[1]*X_batch.shape[2])
            loss = test_on_batch(X_batch, y_batch)
            template = '[Testing] Epoch {}, Batch {}/{}, Loss: {}, Accuracy: {:.2%} '
            print(template.format(epoch+1,
                                batch_index,
                                data_loader.test_size // batch_size,
                                loss,
                                test_accuracy.result()),
                end='\r')

        template = 'Epoch {}, Loss: {}, Accuracy: {:.2%}, Test Loss: {}, Test Accuracy: {:.2%} '
        print(template.format(epoch+1,
                            train_loss.result(),
                            train_accuracy.result(),
                            test_loss.result(),
                            test_accuracy.result()))

In [None]:
inputs = tf.keras.Input(shape=(784, ))
x = inputs
x = stacked_auto_encoder.encode(x)
x = tf.keras.layers.Dense(10, activation='softmax')(x)
outputs = x
model = tf.keras.Model(inputs=inputs, outputs=outputs)

# Pretrain Softmax Layer

In [None]:
for ae in stacked_auto_encoder:
    ae.trainable = False
model.summary()

train_model(model, num_epochs=20, learning_rate=1e-4, batch_size=64)

# Fine Tuning

In [None]:
for ae in stacked_auto_encoder:
    ae.trainable = True
model.summary()

train_model(model, num_epochs=50, learning_rate=1e-5, batch_size=256)