### Data

In [None]:
from __future__ import print_function

import numpy as np

%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

In [None]:
import keras
from keras.datasets import mnist
from keras.models import Sequential, Model
from keras.layers import Input, Dense, Dropout

In [None]:
# keras can be installed/updated with `pip install -U keras`
print(keras.__version__)

In [None]:
batch_size = 128
num_classes = 10
epochs = 2

In [None]:
# the data, shuffled and split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()
print('x_train shape:', x_train.shape)
print('y_train shape:', y_train.shape)

In [None]:
plt.imshow(x_train[0], cmap='gray')

In [None]:
print(y_train[:5])

In [None]:
print(x_train[0])

In [None]:
x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')

### Ensemble classification

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier()

In [None]:
rf.fit(x_train, y_train)   # this may take a while
rf.score(x_test, y_test)

In [None]:
# visualize feature importance map
importance = rf.feature_importances_.reshape(28, 28)
plt.imshow(importance, cmap='gray')

In [None]:
x_train_small = x_train[:600]
y_train_small = y_train[:600]
rf.fit(x_train_small, y_train_small)
rf.score(x_test, y_test)

In [None]:
y_train_c = y_train
y_test_c = y_test

In [None]:
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

In [None]:
# recall y_train was [5 0 4 1 9]
print(y_train[:5])

### Build and train an MLP model

In [None]:
model = Sequential()
model.add(Dense(512, activation='relu', input_shape=(784,)))
model.add(Dropout(0.2))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(10, activation='softmax'))

model.summary()

In [None]:
# Press shift-tab in jupyter to show function parameters
model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [None]:
model.fit(x_train, y_train,
          batch_size=batch_size, epochs=epochs,
          verbose=1, validation_data=(x_test, y_test))

In [None]:
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

In [None]:
np.set_printoptions(suppress=True, precision=4, linewidth=100)

y_pred = model.predict(x_test)
print('y_test:\n', y_test[:5])
print('y_pred:\n', y_pred[:5])

In [None]:
print('y_test:', np.argmax(y_test[:5], axis=1))
print('y_pred:', np.argmax(y_pred[:5], axis=1))

### Build and train an autoencoder

In [None]:
# this is the size of our encoded representations
encoding_dim = 32  # 32 floats -> compression of factor 24.5, assuming the input is 784 floats
# encoding_dim = 2   #  2 floats -> compression of factor 392


# this is our input placeholder
input_img = Input(shape=(784,))
# "encoded" is the encoded representation of the input
encoded = Dense(encoding_dim, activation='relu')(input_img)
# "decoded" is the lossy reconstruction of the input
decoded = Dense(784, activation='sigmoid')(encoded)

# this model maps an input to its reconstruction
autoencoder = Model(inputs=input_img, outputs=decoded)
print("autoencoder model created")

In [None]:
# this model maps an input to its encoded representation
encoder = Model(inputs=input_img, outputs=encoded)

# create a placeholder for an encoded (32-dimensional) input
encoded_input = Input(shape=(encoding_dim,))
# retrieve the last layer of the autoencoder model
decoder_layer = autoencoder.layers[-1]
# create the decoder model
decoder = Model(inputs=encoded_input, outputs=decoder_layer(encoded_input))

In [None]:
autoencoder.summary()

In [None]:
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')

history = autoencoder.fit(x_train, x_train,
                          batch_size=batch_size, epochs=epochs,
                          verbose=1, validation_data=(x_test, x_test))

In [None]:
# encode and decode some digits
# note that we take them from the *test* set
encoded_imgs = encoder.predict(x_test)
decoded_imgs = decoder.predict(encoded_imgs)

In [None]:
n = 10  # how many digits we will display
fig = plt.figure(figsize=(20, 4))
for i in range(10):
    # display original
    ax = plt.subplot(2, n, i + 1)
    plt.imshow(x_test[i].reshape(28, 28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # display reconstruction
    ax = plt.subplot(2, n, i + 1 + n)
    plt.imshow(decoded_imgs[i].reshape(28, 28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
plt.show()

### Denoising autoencoder

In [None]:
noise_factor = 0.5
x_train_noisy = x_train + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_train.shape)
x_test_noisy = x_test + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_test.shape)

x_train_noisy = np.clip(x_train_noisy, 0., 1.)
x_test_noisy = np.clip(x_test_noisy, 0., 1.)

In [None]:
history = autoencoder.fit(x_train_noisy, x_train,
                          batch_size=batch_size, epochs=epochs,
                          verbose=1, validation_data=(x_test_noisy, x_test))

In [None]:
encoded_imgs = encoder.predict(x_test)
decoded_imgs = decoder.predict(encoded_imgs)

n = 10  # how many digits we will display
fig = plt.figure(figsize=(20, 4))
for i in range(10):
    # display original
    ax = plt.subplot(2, n, i + 1)
    plt.imshow(x_test_noisy[i].reshape(28, 28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # display reconstruction
    ax = plt.subplot(2, n, i + 1 + n)
    plt.imshow(decoded_imgs[i].reshape(28, 28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
plt.show()

In [None]:
# recall we run RF on a subset of the original data
print('Classification on data shape:', x_train_small.shape)
x_train_small = x_train[:600]
y_train_small = y_train_c[:600]
rf.fit(x_train_small, y_train_small)
rf.score(x_test, y_test_c)

In [None]:
# let's see if the latent representation is a good proxy
x_train_small_encoded = encoder.predict(x_train_small)
print('Classification on new data shape:', x_train_small_encoded.shape)
x_test_encoded = encoded_imgs
rf = RandomForestClassifier()
rf.fit(x_train_small_encoded, y_train_small)
rf.score(x_test_encoded, y_test_c)

In [None]:
# what about something more drastic?
(x_train, y_train), (x_test, y_test) = mnist.load_data()

x_train_one_column = np.zeros(x_train.shape)
x_test_one_column = np.zeros(x_test.shape)

x_train_one_column[:, :, 14] = x_train[:, :, 14]
x_test_one_column[:, :, 14] = x_test[:, :, 14]

x_train = x_train.reshape(60000, 784).astype('float32') / 255.
x_test = x_test.reshape(10000, 784).astype('float32') / 255.

x_train_one_column = x_train_one_column.reshape(60000, 784).astype('float32') / 255.
x_test_one_column = x_test_one_column.reshape(10000, 784).astype('float32') / 255.

In [None]:
autoencoder.fit(x_train_one_column, x_train,
                batch_size=batch_size, epochs=5,
                verbose=1, validation_data=(x_test_one_column, x_test))

In [None]:
encoded_imgs = encoder.predict(x_test_one_column)
decoded_imgs = decoder.predict(encoded_imgs)

n = 10  # how many digits we will display
fig = plt.figure(figsize=(20, 6))
for i in range(10):
    # display original
    ax = plt.subplot(3, n, i + 1)
    plt.imshow(x_test[i].reshape(28, 28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # display original
    ax = plt.subplot(3, n, i + 1 + n)
    plt.imshow(x_test_one_column[i].reshape(28, 28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # display reconstruction
    ax = plt.subplot(3, n, i + 1 + n +n )
    plt.imshow(decoded_imgs[i].reshape(28, 28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)    

plt.show()