# *Modern Deep Learning for Tabular Data*, Chapter 8

**Autoencoders**

This notebook contains the complementary code discussed in Chapter 8 of *Modern Deep Learning for Tabular Data*.

External Kaggle links to datasets used in this notebook:
- [Mouse Protein Expression Dataset](https://www.kaggle.com/datasets/washingtongold/mpempe)
- [Higgs Boson Dataset](https://www.kaggle.com/datasets/mragpavank/higs-bonsons-and-background-process)

You can download these datasets from Kaggle, or import these notebooks into Kaggle and connect them internally.

---

## Importing and Installing Libraries

In [None]:
# data management
import numpy as np                   # for linear algebra
import pandas as pd                  # for tabular data manipulation and processing

# machine learning
import sklearn                       # for data prep and classical ML
import tensorflow as tf              # for deep learning
from tensorflow import keras         # for deep learning
import keras.layers as L             # for easy NN layer access

# data visualization and graphics
import matplotlib.pyplot as plt      # for visualization fundamentals
import seaborn as sns                # for pretty visualizations
import cv2                           # for image manipulation

# misc
from tqdm.notebook import tqdm       # for progress bars
import math                          # for calculation
import sys                           # for system manipulation
import os                            # for file manipulation

---

## Vanilla Autoencoders

Let's begin by loading MNIST data.

In [None]:
(x_train, y_train), (x_valid, y_valid) = keras.datasets.mnist.load_data()
x_train = x_train.reshape(len(x_train),784)/255
x_valid = x_valid.reshape(len(x_valid),784)/255

Creating an autoencoder on MNIST.

In [None]:
from keras.models import Sequential

# define architecture
model = Sequential()
model.add(L.Input((784,)))
model.add(L.Dense(256, activation='relu'))
model.add(L.Dense(64, activation='relu'))
model.add(L.Dense(32, activation='relu'))
model.add(L.Dense(64, activation='relu'))
model.add(L.Dense(256, activation='relu'))
model.add(L.Dense(784, activation='sigmoid'))

# compile
model.compile(optimizer='adam',
              loss='binary_crossentropy')

# fit
model.fit(x_train, x_train, epochs=1,
          validation_data=(x_valid, x_valid))

Using compartmentalized submodel design.

In [None]:
from keras.models import Model

# define architecture components
encoder = Sequential(name='encoder')
encoder.add(L.Input((784,)))
encoder.add(L.Dense(256, activation='relu'))
encoder.add(L.Dense(64, activation='relu'))
encoder.add(L.Dense(32, activation='relu')) 

decoder = Sequential(name='decoder')
decoder.add(L.Input((32,)))
decoder.add(L.Dense(64, activation='relu'))
decoder.add(L.Dense(256, activation='relu'))
decoder.add(L.Dense(784, activation='sigmoid'))

# define model architecture from components
ae_input = L.Input((784,), name='input')
ae_encoder = encoder(ae_input)
ae_decoder = decoder(ae_encoder)
ae = Model(inputs = ae_input,
           outputs = ae_decoder)

# compile
ae.compile(optimizer='adam',
           loss='binary_crossentropy') # note that in other situations other losses may be more suitable

# fit
history = ae.fit(x_train, x_train, epochs=1,
                 validation_data=(x_valid, x_valid))

In [None]:
tensorflow.keras.utils.plot_model(ae, show_shapes=True, dpi=400)

Demonstrating the latent space compressions of the autoencoder.

In [None]:
plt.set_cmap('gray')
for i in range(10):
    plt.figure(figsize=(10, 5), dpi=400)
    plt.subplot(1, 3, 1)
    plt.imshow(x_valid[i].reshape((28, 28)))
    plt.axis('off')
    plt.title('Original Input')
    plt.subplot(1, 3, 2)
    plt.imshow(encoder.predict(x_valid[i:i+1]).reshape((8, 4)))
    plt.axis('off')
    plt.title('Latent Space (Reshaped)')
    plt.subplot(1, 3, 3)
    plt.imshow(ae.predict(x_valid[i:i+1]).reshape((28, 28)))
    plt.axis('off')
    plt.title('Reconstructed')
    plt.show()

A generic autoencoder creation function.

In [None]:
def buildAutoencoder(inputSize=784, latentSize=32, outActivation='sigmoid'):

    # define architecture components
    encoder = Sequential(name='encoder')
    encoder.add(L.Input((inputSize,)))
    for i in range(int(np.floor(np.log2(inputSize/latentSize))), -1, -1):
        encoder.add(L.Dense(latentSize * 2**i, activation='relu'))

    decoder = Sequential(name='decoder')
    decoder.add(L.Input((latentSize,)))
    for i in range(1,int(np.floor(np.log2(inputSize/latentSize)))+1):
        decoder.add(L.Dense(latentSize * 2**i, activation='relu'))
    decoder.add(L.Dense(inputSize, activation=outActivation))

    # define model archtitecture from components
    ae_input = L.Input((inputSize,), name='input')
    ae_encoder = encoder(ae_input)
    ae_decoder = decoder(ae_encoder)
    ae = Model(inputs = ae_input,
               outputs = ae_decoder)

    return {'model': ae, 'encoder': encoder, 'decoder': decoder}

In [None]:
built_ae = buildAutoencoder(784, 32)['model']
tensorflow.keras.utils.plot_model(built_ae, show_shapes=True, dpi=400)

Varying autoencoder performance by latent space size.

In [None]:
inputSize = 784

earlyStopping = keras.callbacks.EarlyStopping(monitor='loss',
                                              patience=5)

latentSizes = list(range(1, int(np.floor(np.log2(inputSize)))))
validPerf = []
trainHist = []
for latentSize in tqdm(latentSizes):
    model = buildAutoencoder(inputSize, 2**latentSize)['model']
    model.compile(optimizer='adam', loss='categorical_crossentropy')
    history = model.fit(x_train, x_train, epochs=50, callbacks=[earlyStopping], verbose=0)
    score = keras.metrics.MeanAbsoluteError()
    score.update_state(model.predict(x_valid), x_valid)
    validPerf.append(score.result().numpy())
    trainHist.append(history.history['loss'])

plt.figure(figsize=(15, 7.5), dpi=400)
plt.plot(latentSizes, validPerf, color='red')
plt.ylabel('Validation Performance')
plt.xlabel('Latent Size (power of 2)')
plt.grid()
plt.show()

plt.set_cmap('magma')
plt.figure(figsize=(20, 10), dpi=400)
for size, hist in zip(latentSizes, trainHist):
    plt.plot(hist, label=f'Latent Size {size}')
plt.xlabel('Epoch')
plt.ylabel('Training Performance')
plt.legend()
plt.grid()
plt.show()

Visualizing the latent space of an overcomplete model.

In [None]:
# sample overcomplete model - try visualizing the latent space!

# model = Sequential()
# model.add(L.Input((784,)))
# model.add(L.Dense(1024, activation='relu'))
# model.add(L.Dense(2048, activation='relu'))
# model.add(L.Dense(1024, activation='relu'))
# model.add(L.Dense(784, activation='sigmoid'))

# model.compile(optimizer='adam', loss='binary_crossentropy')
# model.fit(x_train, x_train, epochs=50)

# from sklearn.manifold import TSNE
modelSet = buildAutoencoder(512, 512)
model = modelSet['model']
encoder = modelSet['encoder']
model.compile(optimizer='adam', loss='binary_crossentropy')
model.fit(x_train, x_train, epochs=50, callbacks=[earlyStopping], verbose=0)
transformed = encoder.predict(x_train)
tsne_ = TSNE(n_components=2).fit_transform(transformed)

plt.figure(figsize=(10, 10), dpi=400)
plt.scatter(tsne_[:,0], tsne_[:,1], c=y_train)
plt.show()
plt.close()

Example line reconstruction task.

In [None]:
x = np.zeros((1024, 50, 50))
for i in range(1024):
    start = [np.random.randint(0, 50), np.random.randint(0, 50)]
    end = [np.random.randint(0, 50), np.random.randint(0, 50)]
    x[i,:,:] = cv2.line(x[i,:,:], start, end, 1, 4)
x = x.reshape((1024, 50 * 50))
modelSet = buildAutoencoder(50 * 50, 4)
model = modelSet['model']
encoder = modelSet['encoder']
model.compile(optimizer='adam', loss='binary_crossentropy')
model.fit(x, x, epochs=1, validation_split=0.2)
plt.set_cmap('gray')
for i in range(5):
    pred = model.predict(x[i:i+1]).reshape((50, 50))
    plt.figure(figsize=(10, 5), dpi=400)
    plt.subplot(1, 2, 1)
    plt.imshow(x[i].reshape((50, 50)))
    plt.axis('off')
    plt.subplot(1, 2, 2)
    plt.imshow(pred)
    plt.axis('off')
    plt.show()
    plt.close()

Using an autoencoder on a real tabular dataset (Mouse Protein Expression dataset).

In [None]:
data = pd.read_csv('../input/mpempe/mouse-protein-expression.csv').drop('Unnamed: 0', axis=1)
x = data.drop('class', axis=1)
y = data['class']
modelSet = buildAutoencoder(len(x.columns), 8, outActivation='linear')
ae = modelSet['model']
encoder = modelSet['encoder']

ae.compile(optimizer='adam',
           loss='mse',
           metrics=['mae'])
history = ae.fit(x, x, epochs=50)
plt.figure(figsize=(15, 7.5), dpi=400)
plt.plot(history.history['loss'], color='red')
plt.grid()
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()
plt.close()

In [None]:
encoded = encoder.predict(x)
recon = ae.predict(x)

plt.set_cmap('gray')

for i in range(1):
    
    plt.figure(figsize=(10, 8), dpi=400)
    sns.heatmap(np.array(x.iloc[i]).reshape((8, 10)), cbar=False,
                xticklabels=[], yticklabels=[],
                cmap='gray',
                annot=True)
    plt.title('Input Features')
    plt.show()
    plt.close()
    
    plt.figure(figsize=(10, 8), dpi=400)
    sns.heatmap(np.array(recon[i]).reshape((8, 10)), cbar=False,
                xticklabels=[], yticklabels=[],
                cmap='gray',
                annot=True)
    plt.title('Reconstruction')
    plt.show()
    plt.close()
    
    plt.figure(figsize=(8, 1), dpi=400)
    sns.heatmap(encoded[i].reshape((1, 8)), cbar=False,
                xticklabels=[], yticklabels=[],
                cmap='gray')
    plt.title('Latent Space')
    plt.show()
    plt.close()
    
    print('-'*50)

from sklearn.manifold import TSNE
transformed = encoder.predict(x)
tsne_ = TSNE(n_components=2).fit_transform(transformed)

plt.figure(figsize=(10, 10), dpi=400)
plt.scatter(tsne_[:,0], tsne_[:,1], c=y, cmap='viridis')
plt.show()
plt.close()

Applying various latent space sizes to the Mouse Protein Expression dataset.

In [None]:
from sklearn.manifold import TSNE

inputSize = len(x.columns)

earlyStopping = keras.callbacks.EarlyStopping(monitor='loss',
                                              patience=5)

latentSizes = list(range(1, int(np.floor(np.log2(inputSize)))))

plt.set_cmap('viridis')

for latentSize in tqdm(latentSizes):
    
    modelSet = buildAutoencoder(inputSize, 2**latentSize)
    model = modelSet['model']
    encoder = modelSet['encoder']
    model.compile(optimizer='adam', loss='categorical_crossentropy')
    model.fit(mpe_x_train, mpe_x_train, epochs=5, callbacks=[earlyStopping], verbose=0)
    transformed = encoder.predict(mpe_x_train)
    tsne_ = TSNE(n_components=2).fit_transform(transformed)
    
    plt.figure(figsize=(10, 10), dpi=400)
    plt.scatter(tsne_[:,0], tsne_[:,1], c=mpe_y_train)
    plt.show()
    plt.close()

---

## Autoencoders for Pretraining[[](http://)](http://)

Pretraining with the MNIST dataset.

In [None]:
(x_train, y_train), (x_valid, y_valid) = keras.datasets.mnist.load_data()
x_train = x_train.reshape(len(x_train),784)/255
x_valid = x_valid.reshape(len(x_valid),784)/255

In [None]:
modelSet = buildAutoencoder(784, 32)
model = modelSet['model']
encoder = modelSet['encoder']
model.compile(optimizer='adam', loss='binary_crossentropy')
model.fit(x_train, x_train, epochs=20)

In [None]:
inp = L.Input((784,))
encoded = encoder(inp)
dense1 = L.Dense(16, activation='relu')(encoded)
dense2 = L.Dense(16, activation='relu')(dense1)
dense3 = L.Dense(10, activation='softmax')(dense2)
encoded.trainable = False
task_model = Model(inputs=inp, outputs=dense3)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
history = model.fit(x_train, y_train, epochs=20)

In [None]:
modelSet = buildAutoencoder(784, 32)
model = modelSet['model']
encoder = modelSet['encoder']

inp = L.Input((784,))
encoded = encoder(inp)
dense1 = L.Dense(16, activation='relu')(encoded)
dense2 = L.Dense(16, activation='relu')(dense1)
dense3 = L.Dense(10, activation='softmax')(dense2)
encoded.trainable = False
task_model = Model(inputs=inp, outputs=dense3)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
history2 = model.fit(x_train, y_train, epochs=20)
plt.figure(figsize=(15, 7.5), dpi=400)
plt.plot(history.history['loss'], color='red', label='With AE Pretraining')
plt.plot(history2.history['loss'], color='blue', label='Without AE Pretraining')
plt.grid()
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

Pretraining with the Higgs Boson dataset.

In [None]:
train_df = pd.read_csv('../input/higs-bonsons-and-background-process/train.csv').replace('?', np.nan).dropna()
X_train = train_df.drop(['class', 'id'], axis=1).astype(np.float32)
y_train = train_df['class'].astype(np.float32)

valid_df = pd.read_csv('../input/higs-bonsons-and-background-process/test.csv').replace('?', np.nan).dropna()
X_valid = valid_df.drop(['class', 'id'], axis=1).astype(np.float32)
y_valid = valid_df['class'].astype(np.float32)
encoder = Sequential()
encoder.add(L.Input((len(X_train.columns),)))
encoder.add(L.Dense(28, activation='relu'))
encoder.add(L.Dense(28, activation='relu'))
encoder.add(L.Dense(28, activation='relu'))
encoder.add(L.Dense(16, activation='relu'))
encoder.add(L.Dense(16, activation='relu'))
encoder.add(L.Dense(16, activation='relu'))

decoder = Sequential()
decoder.add(L.Input((16,)))
decoder.add(L.Dense(16, activation='relu'))
decoder.add(L.Dense(16, activation='relu'))
decoder.add(L.Dense(16, activation='relu'))
decoder.add(L.Dense(28, activation='relu'))
decoder.add(L.Dense(28, activation='relu'))
decoder.add(L.Dense(28, activation='linear'))

inp = L.Input((28,))
encoded = encoder(inp)
decoded = decoder(encoded)
ae = keras.models.Model(inputs=inp, outputs=decoded)

ae.compile(optimizer='adam', loss='mse', metrics=['mae'])
history = ae.fit(X_train, X_train, epochs=100,
                 validation_data=(X_valid, X_valid))
inp = L.Input((len(X_train.columns),))
encoded = encoder(inp)
dense1 = L.Dense(16, activation='relu')(encoded)
dense2 = L.Dense(16, activation='relu')(dense1)
dense3 = L.Dense(16, activation='relu')(dense2)
dense4 = L.Dense(1, activation='sigmoid')(dense3)
encoded.trainable = False
task_model = keras.models.Model(inputs=inp, outputs=dense4)
task_model.compile(optimizer='adam', loss='binary_crossentropy',
              metrics=['accuracy'])

history_i = task_model.fit(X_train, y_train, epochs=70,
                           validation_data=(X_valid, y_valid))

encoded.trainable = True
history_ii = task_model.fit(X_train, y_train, epochs=30,
                            validation_data=(X_valid, y_valid))
plt.figure(figsize=(15, 7.5), dpi=400)
plt.plot(range(1, 71),
         history_i.history['loss'], 
         color='red', 
         label='Train Stage I')
plt.plot(range(71, 101),
         history_ii.history['loss'], 
         color='red',
         linestyle='--',
         label='Train Stage II')
plt.plot(range(1, 71),
         history_i.history['val_loss'], 
         color='blue', 
         label='Validation Stage I')
plt.plot(range(71, 101),
         history_ii.history['val_loss'], 
         color='blue', 
         linestyle='--',
         label='Validation Stage II')
plt.grid()
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()


plt.figure(figsize=(15, 7.5), dpi=400)
plt.plot(range(1, 71),
         history_i.history['accuracy'], 
         color='red', 
         label='Train Stage I')
plt.plot(range(71, 101),
         history_ii.history['accuracy'], 
         color='red',
         linestyle='--',
         label='Train Stage II')
plt.plot(range(1, 71),
         history_i.history['val_accuracy'], 
         color='blue', 
         label='Validation Stage I')
plt.plot(range(71, 101),
         history_ii.history['val_accuracy'], 
         color='blue', 
         linestyle='--',
         label='Validation Stage II')
plt.grid()
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

## Multitask Autoencoders

Loading MNIST data.

In [None]:
(x_train, y_train), (x_valid, y_valid) = tensorflow.keras.datasets.mnist.load_data()
x_train = x_train.reshape(len(x_train),784)/255
x_valid = x_valid.reshape(len(x_valid),784)/255

Apply multitask autoencoding to MNIST.

In [None]:
modelSet = buildAutoencoder(784, 32)
model = modelSet['model']
encoder = modelSet['encoder']
decoder = modelSet['decoder']

tasker = keras.models.Sequential(name='taskOut')
tasker.add(L.Input((32,)))
for i in range(3):
    tasker.add(L.Dense(16, activation='relu'))
tasker.add(L.Dense(10, activation='softmax'))

inp = L.Input((784,), name='input')
encoded = encoder(inp)
decoded = decoder(encoded)
taskOut = tasker(encoded)

taskModel = Model(inputs=inp, outputs=[decoded, taskOut])

In [None]:
tensorflow.keras.utils.plot_model(taskModel, show_shapes=True)

In [None]:
taskModel.compile(optimizer='adam',
                  loss = {'decoder':'binary_crossentropy',
                          'taskOut':'sparse_categorical_crossentropy'})

TOTAL_EPOCHS = 100

for epoch in range(1, TOTAL_EPOCHS):
    
    predictions = taskModel.predict(x_valid[0:1])
    
    plt.figure(figsize=(15, 7.5), dpi=400)
    plt.subplot(1, 3, 1)
    plt.imshow(x_valid[0].reshape((28, 28)), cmap='gray')
    plt.axis('off')
    plt.ylabel(f'EPOCH {epoch}')
    plt.title('Original Input')
    plt.subplot(1, 3, 2)
    plt.imshow(predictions[0].reshape((28, 28)), cmap='gray')
    plt.axis('off')
    plt.title('Decoder Output')
    plt.subplot(1, 3, 3)
    plt.imshow(predictions[1].reshape((10, 1)), cmap='gray')
    plt.title('Task Output')
    plt.show()
    
    history = taskModel.fit(x_train, {'decoder':x_train, 'taskOut': y_train},
                            epochs=1)


Applying multi-task autoencoding to the Mouse Protein Expression dataset.

In [None]:
df = pd.read_csv('../input/mpempe/mouse-protein-expression.csv').drop('Unnamed: 0', axis=1)

from sklearn.model_selection import train_test_split as tts
mpe_x = df.drop('class', axis=1)
mpe_y = df['class']
X_train, X_valid, y_train, y_valid = tts(mpe_x, mpe_y, train_size = 0.8, random_state = 42)
from keras.models import Sequential

encoder = Sequential()
encoder.add(L.Input((len(X_train.columns),)))
encoder.add(L.Dense(28, activation='relu'))
encoder.add(L.Dense(28, activation='relu'))
encoder.add(L.Dense(28, activation='relu'))
encoder.add(L.Dense(16, activation='relu'))
encoder.add(L.Dense(16, activation='relu'))
encoder.add(L.Dense(16, activation='relu'))

decoder = Sequential(name='decoder')
decoder.add(L.Input((16,)))
decoder.add(L.Dense(16, activation='relu'))
decoder.add(L.Dense(16, activation='relu'))
decoder.add(L.Dense(16, activation='relu'))
decoder.add(L.Dense(28, activation='relu'))
decoder.add(L.Dense(28, activation='relu'))
decoder.add(L.Dense(len(X_train.columns), activation='linear'))

modelSet = buildAutoencoder(len(X_train.columns), 32, outActivation='linear')
ae = modelSet['model']
encoder = modelSet['encoder']
decoder = modelSet['decoder']

inp = L.Input((len(X_train.columns),))
encoded = encoder(inp)
decoded = decoder(encoded)
ae = keras.models.Model(inputs=inp, outputs=decoded)

ae.compile(optimizer='adam', loss='mse', metrics=['mae'])
history = ae.fit(X_train, X_train, epochs=100,
                 validation_data=(X_valid, X_valid))


from keras.models import Model

tasker = keras.models.Sequential(name='taskOut')
tasker.add(L.Input((32,)))
for i in range(3):
    tasker.add(L.Dense(16, activation='relu'))
tasker.add(L.Dense(8, activation='softmax'))

inp = L.Input((len(X_train.columns),), name='input')
encoded = encoder(inp)
decoded = decoder(encoded)
taskOut = tasker(encoded)

taskModel = Model(inputs=inp, outputs=[decoded, taskOut])
tensorflow.keras.utils.plot_model(taskModel, show_shapes=True)
taskModel.compile(optimizer='adam',
                  loss = {'decoder':'mse',
                          'taskOut':'sparse_categorical_crossentropy'})

TOTAL_EPOCHS = 5

X_valid = np.array(X_valid)
y_valid = np.array(y_valid)

for epoch in range(1, TOTAL_EPOCHS):
    
    # show performance on three samples
    for index in range(3):
        print('-'*50)
        print(f'Epoch {epoch}')

        predictions = taskModel.predict(X_valid[index:index+1])

        plt.figure(figsize=(30/4*3, 6), dpi=400)
        plt.subplot(1, 3, 1)
        sns.heatmap(X_valid[index].reshape((8, 10)),
                    annot=True,
                    fmt='.1f',
                    cmap='gray',
                    cbar=False,
                    vmin = -1.5,
                    vmax = 2.5)
        plt.axis('off')
        plt.ylabel(f'EPOCH {epoch}')
        plt.title('Original Input')
        plt.subplot(1, 3, 2)
        sns.heatmap(predictions[0].reshape((8, 10)),
                    annot=True,
                    fmt='.1f',
                    cmap='gray',
                    cbar=False,
                    vmin = -1.5,
                    vmax = 2.5)
        plt.axis('off')
        plt.title('Decoder Output')
        plt.subplot(1, 3, 3)
        sns.heatmap(np.abs(predictions[0].reshape((8, 10)) - X_valid[index].reshape((8, 10))),
                    annot=True,
                    fmt='.1f',
                    cmap='gray',
                    cbar=False,
                    vmin = -1.5,
                    vmax = 2.5)
        plt.axis('off')
        plt.title('Recon. Abs. Error')
        plt.show()
        
        plt.close()
        
        plt.figure(figsize=(10, 4), dpi=400)
        plt.subplot(3, 1, 1)
        sns.heatmap(predictions[1][0].reshape((1, 8)),
                    annot=True,
                    fmt='.1f',
                    cmap='gray',
                    cbar=False,
                    vmin = -1.5,
                    vmax = 2.5,
                    yticklabels=['Predicted'],
                    xticklabels=['' for i in range(8)])
        plt.subplot(3, 1, 2)
        mat = np.zeros((8, 1))
        mat[int(y_valid[index]) - 1] = 1
        sns.heatmap(mat.reshape((1, 8)),
                    annot=True,
                    fmt='.3f',
                    cmap='gray',
                    cbar=False,
                    vmin = -1.5,
                    vmax = 2.5,
                    yticklabels=['Truth'],
                    xticklabels=['' for i in range(8)])
        plt.subplot(3, 1, 3)
        sns.heatmap(np.abs(predictions[1][0].reshape((1, 8)) - mat.reshape((1, 8))),
                    annot=True,
                    fmt='.3f',
                    cmap='gray',
                    cbar=False,
                    vmin = -1.5,
                    vmax = 2.5,
                    yticklabels=['Abs. Error'])
        plt.show()
        plt.close()
    
    # fit for one epoch
    taskModel.fit(X_train, {'decoder':X_train, 'taskOut': y_train - 1},
                  epochs=1)

In [None]:
modelSet = buildAutoencoder(784, 32)
model = modelSet['model']
encoder = modelSet['encoder']
decoder = modelSet['decoder']

tasker = keras.models.Sequential(name='taskOut')
tasker.add(L.Input((32,)))
for i in range(3):
    tasker.add(L.Dense(16, activation='relu'))
tasker.add(L.Dense(10, activation='softmax'))

inp = L.Input((784,), name='input')
encoded = encoder(inp)
decoded = decoder(encoded)
taskOut = tasker(encoded)

taskModel = Model(inputs=inp, outputs=[decoded, taskOut])
plt.figure(figsize=(15, 7.5), dpi=400)
epochs = np.linspace(1, 100, 100)
alpha = (1/(1 + np.exp(-(epochs-50)/10))) / 2 + (1/4)
plt.plot(epochs, alpha, color='red', label='Task Output Weight')
plt.plot(epochs, 1-alpha, color='blue', label='Decoder Output Weight')
plt.xlabel('Epochs')
plt.ylim([0, 1])
plt.legend()
plt.show()
ae.compile(optimizer='adam',
           loss='mae')
ae.fit(X_train, X_train, epochs=1) # change this!
total_epochs = 1 # change this!

lossParams = {'decoder':'mse',
              'taskOut':'sparse_categorical_crossentropy'}

loss, decoderLoss, taskOutLoss = [], [], []

for epoch in range(1, total_epochs+1):
    
    alpha = (1/(1 + np.exp(-(epoch-50)/5)))
    
    for index in range(3):
    
        print('-'*500)
        print(f'Epoch {epoch}')

        predictions = taskModel.predict(X_valid[index:index+1])

        plt.figure(figsize=(30/4*3, 6), dpi=400)
        plt.subplot(1, 3, 1)
        sns.heatmap(X_valid[index].reshape((8, 10)),
                    annot=True,
                    fmt='.1f',
                    cmap='gray',
                    cbar=False,
                    vmin = -1.5,
                    vmax = 2.5)
        plt.axis('off')
        plt.ylabel(f'EPOCH {epoch}')
        plt.title('Original Input')
        plt.subplot(1, 3, 2)
        sns.heatmap(predictions[0].reshape((8, 10)),
                    annot=True,
                    fmt='.1f',
                    cmap='gray',
                    cbar=False,
                    vmin = -1.5,
                    vmax = 2.5)
        plt.axis('off')
        plt.title('Decoder Output')
        plt.subplot(1, 3, 3)
        sns.heatmap(np.abs(predictions[0].reshape((8, 10)) - X_valid[index].reshape((8, 10))),
                    annot=True,
                    fmt='.1f',
                    cmap='gray',
                    cbar=False,
                    vmin = -1.5,
                    vmax = 2.5)
        plt.axis('off')
        plt.title('Recon. Abs. Error')
        plt.show()
        
        plt.close()
        
        plt.figure(figsize=(10, 4), dpi=400)
        plt.subplot(3, 1, 1)
        sns.heatmap(predictions[1][0].reshape((1, 8)),
                    annot=True,
                    fmt='.1f',
                    cmap='gray',
                    cbar=False,
                    vmin = -1.5,
                    vmax = 2.5,
                    yticklabels=['Predicted'],
                    xticklabels=['' for i in range(8)])
        plt.subplot(3, 1, 2)
        mat = np.zeros((8, 1))
        mat[int(y_valid[index])] = 1
        sns.heatmap(mat.reshape((1, 8)),
                    annot=True,
                    fmt='.3f',
                    cmap='gray',
                    cbar=False,
                    vmin = -1.5,
                    vmax = 2.5,
                    yticklabels=['Truth'],
                    xticklabels=['' for i in range(8)])
        plt.subplot(3, 1, 3)
        sns.heatmap(np.abs(predictions[1][0].reshape((1, 8)) - mat.reshape((1, 8))),
                    annot=True,
                    fmt='.3f',
                    cmap='gray',
                    cbar=False,
                    vmin = -1.5,
                    vmax = 2.5,
                    yticklabels=['Abs. Error'])
        plt.show()
        plt.close()
    
    
    taskModel.compile(optimizer='adam',
                      loss = lossParams,
                      loss_weights = {'taskOut': alpha,
                                      'decoder': 1-alpha},
                      metrics = {'taskOut': ['accuracy']})
    history = taskModel.fit(X_train, {'decoder':X_train, 'taskOut': y_train - 1},
                            epochs = 1, batch_size = 128)
    loss.extend(history.history['loss'])
    decoderLoss.extend(history.history['decoder_loss'])
    taskOutLoss.extend(history.history['taskOut_loss'])

In [None]:
plt.figure(figsize=(15, 7.5), dpi=400)
plt.plot(range(1, total_epochs+1), decoderLoss, color='red', linestyle='--', label='Reconstruction Loss')
plt.plot(range(1, total_epochs+1), taskOutLoss, color='blue', label='Task Loss')
plt.plot(range(1, total_epochs+1), loss, color='green', linestyle='-.', label='Overall Loss')

for i in np.linspace(1, 100, 500):
    plt.axvline(x=i, linestyle='-', linewidth=0.5, color='blue', alpha=1/(1 + np.exp(-(i-50)/7.5)))
for i in np.linspace(1, 100, 500):
    plt.axvline(x=i, linestyle='-', linewidth=0.5, color='red', alpha=1-1/(1 + np.exp(-(i-50)/7.5)))
    
plt.grid()
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

---

## Sparse Autoencoders

Modifying the autoencoder function as a sparse autoencoder function.

In [None]:
from keras.models import Sequential, Model
import keras.layers as L

def buildSparseAutoencoder(inputSize=784, 
                           impLatentSize=32,
                           realLatentSize=128,
                           outActivation='sigmoid'):

    # define architecture components
    encoder = Sequential(name='encoder')
    encoder.add(L.Input((inputSize,)))
    for i in range(int(np.floor(np.log2(inputSize/impLatentSize))), -1, -1):
        encoder.add(L.Dense(impLatentSize * 2**i, activation='relu'))
        encoder.add(L.Dense(impLatentSize * 2**i, activation='relu'))
    encoder.add(L.Dense(realLatentSize, activation='relu',
                        activity_regularizer = keras.regularizers.L1(0.001)))
    
    decoder = Sequential(name='decoder')
    decoder.add(L.Input((realLatentSize,)))
    for i in range(1,int(np.floor(np.log2(inputSize/impLatentSize)))+1):
        decoder.add(L.Dense(impLatentSize * 2**i, activation='relu'))
        decoder.add(L.Dense(impLatentSize * 2**i, activation='relu'))
    decoder.add(L.Dense(inputSize, activation=outActivation))

    # define model archtitecture from components
    ae_input = L.Input((inputSize,), name='input')
    ae_encoder = encoder(ae_input)
    ae_decoder = decoder(ae_encoder)
    ae = Model(inputs = ae_input,
               outputs = ae_decoder)

    return {'model': ae, 'encoder': encoder, 'decoder': decoder}

Applying sparse autoencoders to the Higgs Boson dataset.

In [None]:
train_df = pd.read_csv('../input/higs-bonsons-and-background-process/train.csv').replace('?', np.nan).dropna()
X_train = train_df.drop(['class', 'id'], axis=1).astype(np.float32)
y_train = train_df['class'].astype(np.float32)

valid_df = pd.read_csv('../input/higs-bonsons-and-background-process/test.csv').replace('?', np.nan).dropna()
X_valid = valid_df.drop(['class', 'id'], axis=1).astype(np.float32)
y_valid = valid_df['class'].astype(np.float32)
modelSet = buildSparseAutoencoder(28, 8, 64, 'linear')
model = modelSet['model']
encoder = modelSet['encoder']
model.compile(optimizer='adam', loss='mse')
history = model.fit(X_train, X_train, epochs=1, # change this!
                    verbose=1,
                    validation_data=(X_valid, X_valid)

In [None]:
import seaborn as sns

X_train = np.array(X_train)

encoded = encoder.predict(X_train)
reconst = model.predict(X_train)

plt.set_cmap('gray')

for i in range(5):

    plt.figure(figsize=(15, 5), dpi=400)
    plt.subplot(1, 3, 1)
    plt.title('Original Image')
    plt.axis('off')
    plt.imshow(X_train[i].reshape((7, 4)))
    plt.subplot(1, 3, 2)
    plt.title('Latent Space')
    plt.axis('off')
    plt.imshow(encoded[i].reshape((8, 8)))
    plt.subplot(1, 3, 3)
    plt.title('Reconstructed Image')
    plt.axis('off')
    plt.imshow(reconst[i].reshape((7, 4)))
    plt.show()

Applying sparse autoencoders to the Mouse Protein Expression dataset.

In [None]:
df = pd.read_csv('../input/mpempe/mouse-protein-expression.csv').drop('Unnamed: 0', axis=1)

from sklearn.model_selection import train_test_split as tts
mpe_x = df.drop('class', axis=1)
mpe_y = df['class']
X_train, X_valid, y_train, y_valid = tts(mpe_x, mpe_y, train_size = 0.8, random_state = 42)

In [None]:
modelSet = buildSparseAutoencoder(80, 16, 64, 'linear')
model = modelSet['model']
encoder = modelSet['encoder']
model.compile(optimizer='adam', loss='mse')
history = model.fit(X_train, X_train, epochs=100, verbose=1,
                    validation_data=(X_valid, X_valid))

In [None]:
import seaborn as sns

X_train = np.array(X_train)

encoded = encoder.predict(X_train)
reconst = model.predict(X_train)

plt.set_cmap('gray')

for i in range(5):

    plt.figure(figsize=(15, 5), dpi=400)
    plt.subplot(1, 3, 1)
    plt.title('Original Image')
    plt.axis('off')
    plt.imshow(X_train[i].reshape((8, 10)))
    plt.subplot(1, 3, 2)
    plt.title('Latent Space')
    plt.axis('off')
    plt.imshow(encoded[i].reshape((8, 8)))
    plt.subplot(1, 3, 3)
    plt.title('Reconstructed Image')
    plt.axis('off')
    plt.imshow(reconst[i].reshape((8, 10)))
    plt.show()

In [None]:
(x_train, y_train), (x_valid, y_valid) = tensorflow.keras.datasets.mnist.load_data()
x_train = x_train.reshape(len(x_train),784)/255
x_valid = x_valid.reshape(len(x_valid),784)/255

In [None]:
modelSet = buildSparseAutoencoder(784, 64, 256)
model = modelSet['model']
encoder = modelSet['encoder']
model.compile(optimizer='adam', loss='binary_crossentropy')
history = model.fit(x_train, x_train, epochs=50, verbose=1)

In [None]:
import seaborn as sns

encoded = encoder.predict(x_train)
reconst = model.predict(x_train)

plt.set_cmap('gray')

for i in range(5):

    plt.figure(figsize=(15, 5), dpi=400)
    plt.subplot(1, 3, 1)
    plt.title('Original Image')
    plt.axis('off')
    plt.imshow(x_train[i].reshape((28, 28)))
    plt.subplot(1, 3, 2)
    plt.title('Latent Space')
    plt.axis('off')
    plt.imshow(encoded[i].reshape((16, 16)))
    plt.subplot(1, 3, 3)
    plt.title('Reconstructed Image')
    plt.axis('off')
    plt.imshow(reconst[i].reshape((28, 28)))
    plt.show()
    
    print('-'*50)

---

## Denoising Autoencoders

Applying denoising autoencoders to the MNIST dataset.

In [None]:
(x_train, y_train), (x_valid, y_valid) = tensorflow.keras.datasets.mnist.load_data()
x_train = x_train.reshape(len(x_train),784)/255
x_valid = x_valid.reshape(len(x_valid),784)/255

In [None]:
for std in [0, 0.1, 0.2, 0.3, 0.5, 0.9]:

    modified = x_train + np.random.normal(0, std, size=x_train.shape)
    modified_clipped = np.clip(modified, 0, 1)

    plt.set_cmap('gray')
    plt.figure(figsize=(20, 20), dpi=400)
    for i in range(25):
        plt.subplot(5, 5, i+1)
        plt.imshow(modified_clipped[i].reshape((28, 28)))
        plt.axis('off')
    plt.title(f'STD: {std}')
    plt.show()

In [None]:
models = buildAutoencoder(784, 32)
model = models['model']
encoder = models['encoder']

model.compile(optimizer='adam', loss='mse')

In [None]:
std = 0.9

TOTAL_EPOCHS = 100
loss = []
for i in tqdm(range(TOTAL_EPOCHS)):
    modified = x_train + np.random.normal(0, std, size=x_train.shape)
    modified_clipped = np.clip(modified, 0, 1)
    history = model.fit(modified_clipped, x_train, epochs=1, verbose=0)
    loss.append(history.history['loss'])

modified = x_valid + np.random.normal(0, std, size=x_valid.shape)
modified_clipped = np.clip(modified, 0, 1)

from sklearn.metrics import mean_absolute_error as mae
mae(model.predict(modified_clipped), x_valid)

plt.set_cmap('gray')

for i in range(5):
    
    plt.figure(figsize=(15, 5), dpi=400)
    
    plt.subplot(1, 3, 1)
    plt.imshow(modified_clipped[i].reshape((28, 28)))
    plt.axis('off')
    plt.title('Noisy Input')
    
    plt.subplot(1, 3, 2)
    plt.imshow(x_valid[i].reshape((28, 28)))
    plt.axis('off')
    plt.title('True Denoised')
    
    plt.subplot(1, 3, 3)
    plt.imshow(model.predict(x_valid[i:i+1]).reshape((28, 28)))
    plt.axis('off')
    plt.title('Predicted Denoised')
    
    plt.show()

In [None]:
modified = x_train + np.random.normal(0, std, size=x_train.shape)
modified_clipped = np.clip(modified, 0, 1)

plt.set_cmap('gray')
plt.figure(figsize=(20, 20), dpi=400)
for i in range(25):
    plt.subplot(5, 5, i+1)
    plt.imshow(modified_clipped[i].reshape((28, 28)))
    plt.axis('off')
plt.show()

Applying denoising autoencoders to the Mouse Protein Expression dataset.

In [None]:
data = pd.read_csv('../input/mpempe/mouse-protein-expression.csv').drop(['Unnamed: 0', 'class'], axis=1)
train_indices = np.random.choice(data.index, replace=False, size = round(0.8 * len(data)))
valid_indices = np.array([ind for ind in data.index if ind not in train_indices])
x_train, x_valid = data.loc[train_indices], data.loc[valid_indices]
models = buildAutoencoder(len(data.columns), 16)
model = models['model']
encoder = models['encoder']

model.compile(optimizer='adam', loss='mse')
TOTAL_EPOCHS = 100
loss = []
stds = x_train.std()
for i in tqdm(range(TOTAL_EPOCHS)):
    noise = pd.DataFrame(index=x_train.index, columns=x_train.columns)
    for col in noise.columns:
        noise[col] = np.random.normal(0, stds[col]/5, 
                                      size=(len(x_train),))
    history = model.fit(x_train + noise, x_train, epochs=1, verbose=0)
    loss.append(history.history['loss'])
noise = pd.DataFrame(index=x_valid.index, columns=x_valid.columns)
for col in noise.columns:
    noise[col] = np.random.normal(0, np.sqrt(stds[col]), 
                                  size=(len(x_valid),))

from sklearn.metrics import mean_absolute_error as mae
mae(model.predict(x_valid + noise), x_valid)