# CREATING A CONVOLUTIONAL AUTOENCODER

In [1]:
#!pip install opencv-contrib-python

In [32]:
import cv2
import numpy as np
from tensorflow.keras import Model
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.layers import *

DEFINE THE BUILD_AYTOENCODER() FUNCTION, WHICH INTERNALLY BUILDS THE AQUTOENCODER ARCHITECTURE AND RETURNS THE ENCODER, THE DECODER, ADN THE AUTOENCODER ITSELF.
    START DEFINING THE INPUT AND THE FIRST SET OF 32 CONVOLUTIONAL FILTERS:

In [33]:
def build_autoencoder(input_shape = (28, 28, 1),
                     encoding_size = 32,
                     alpha = 0.2):
    inputs = Input(shape = input_shape)
    encoder = Conv2D(filters = 32,
                    kernel_size = (3, 3),
                    strides = 2,
                    padding = "same") (inputs)
    encoder = LeakyReLU(alpha = alpha) (encoder)
    encoder = BatchNormalization() (encoder)

    # DEFINE THE SECOND SET OF CONVOLUTIONS(64 THIS TIME)

    encoder = Conv2D(filters = 64,
                    kernel_size = (3, 3),
                    strides = 2,
                    padding = "same") (encoder)
    encoder = LeakyReLU(alpha = alpha) (encoder)
    encoder = BatchNormalization() (encoder)

    # DEFINE THE OUTPUT LAYERS OF THE ENCODER
    encoder_output_shape = encoder.shape
    encoder = Flatten() (encoder)
    encoder_output = Dense(units = encoding_size) (encoder)

    encoder_model = Model(inputs, encoder_output)

#  IN STEP 2, WE DEFINED THE ENCODER MODEL, WHICH IS THE REGULAR CONVOLUTIONAL NEURAL NETWORK.
# THE NEXT BLOCK DEFINES THE DECODER MODEL, STARTING WITH THE INPUT AND 64 TRANSPOSED CONVOLUTION FILTERS

    decoder_input = Input(shape = (encoding_size,))
    target_shape = tuple(encoder_output_shape[1:])
    decoder = Dense(np.prod(target_shape)) (decoder_input)
    decoder = Reshape(target_shape) (decoder)
    decoder = Conv2DTranspose(filters = 64,
                             kernel_size = (3, 3),
                             strides = 2,
                             padding = "same") (decoder)
    decoder = LeakyReLU(alpha = alpha) (decoder)
    decoder = BatchNormalization() (decoder)

    # DEFINE THE SECOND SET OF TRANSPOSE (32 THIS TIME)

    decoder = Conv2DTranspose(filters =32,
                             kernel_size = (3, 2),
                             strides = 2,
                             padding = "same") (decoder)
    decoder = LeakyReLU(alpha = alpha) (decoder)
    decoder = BatchNormalization() (decoder)

    # DEFINE THE INPUT LAYER OF THE DECODER
    decoder = Conv2DTranspose(filters = 1,
                             kernel_size = (3, 3),
                             padding = "same") (decoder)
    outputs = Activation("sigmoid") (decoder)

    decoder_model = Model(decoder_input, outputs)

    # THE DECODER USES CONV2DTRANSPOSE LAYERS, WHICH EXPAND THEIR INPUTS TO GENERATE LAYER OUTPUT VOLUMES.
    # NOTICE THAT FURTHER WE GO INTO THE DECODER, THE FEWER FILTERS THE CONV2DTRANSPOSE LAYERS USE. 
    # FINALLY, DEFINE THE AUTOENCODER:

    encoder_model_output = encoder_model(inputs)
    decoder_model_output = decoder_model(encoder_model_output)
    autoencoder_model = Model(inputs, decoder_model_output)

    return encoder_model, decoder_model, autoencoder_model

Defne a function that will plot a sample of general images against their original
counterparts. Tis will help us visually assess the autoencoder's performance.

In [34]:
def plot_original_vs_generated(original, generated):
    num_images = 15
    sample = np.random.randint(0, len(original),
                              num_images)

DEFINE AN INNER HELPER FUNCTION IN ORDER TO STACK A SAMPLE OF IMAGES IN A 3X3 GRID:

In [35]:
    def stack(data):
        images = data[sample]
        return np.vstack([np.hstack(images[:5]),
                         np.hstack(images[5:10]),
                         np.hstack(images[10:15])])

NEXT, DEFINE A FUNCTION THAT WILL PUT ON AN IMAGE IN A GIVEN POSITION:

In [36]:
def add_text(image, text, position):
    pt1 = position
    pt2 = (pt1[0] + 10 + (len(text) * 22), pt1[1] - 45)
    cv2.rectangle(image, pt1, pt2, (255, 255, 255),
                 -1)
    cv2.putText(image, text,
               position,
               fontFace = cv2.FONT_HERSHEY_SIMPLEX,
               fontScale = 1.3,
               color = (0, 0, 0),
               thickness = 4)

    # FINALLY, CREATE A MOSAIC CONTAINING BOTH THE ORIGINAL AND THE GENERATED IMAGES:
    original = stack(original)
    generated = stack(generated)

    mosaic = np.vsatck([original,
                       generated])
    mosaic = cv2.resize(mosaic, (860, 860),
                       interpolation = cv2.INTER_AREA)
    mosaic = cv2.cvOlor(mosaic, cv2.COLOR_GRAY2BGR)

    add_text(mosaic, "original", (50, 100))
    add_text(mosaic, "Generated", (50, 520))

    cv2.imshow("Mosaic", mosaic)
    cv2.waitKey(0)
    

DOWNLOAD (OR LOAD, IF CACHED) FASHION_MNIST.

In [39]:
(x_train, _), (x_test, _) = fashion_mnist.load_data()

NORMALIZE HE IMAGES AND ADD A CHANNEL DIMENSION TO THEM:

In [40]:
x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

x_train = np.expand_dims(x_train, axis = -1)
x_test = np.expand_dims(x_test, axis = -1)

In [41]:
_, _, autoencoder = build_autoencoder(encoding_size = 256)
autoencoder.compile(optimizer = "adam", loss = "mse")



In [42]:

EPOCHS = 20
BATCH_SIZE = 512
autoencoder.fit(x_train, x_train,
               epochs = EPOCHS,
               batch_size = BATCH_SIZE,
               shuffle = True,
               validation_data = (x_test, x_test),
               verbose = 1)

Epoch 1/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 695ms/step - loss: 0.0293 - val_loss: 0.1112
Epoch 2/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m134s[0m 632ms/step - loss: 0.0087 - val_loss: 0.0936
Epoch 3/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m74s[0m 631ms/step - loss: 0.0059 - val_loss: 0.0594
Epoch 4/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 632ms/step - loss: 0.0047 - val_loss: 0.0252
Epoch 5/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 660ms/step - loss: 0.0040 - val_loss: 0.0064
Epoch 6/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 644ms/step - loss: 0.0035 - val_loss: 0.0037
Epoch 7/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 643ms/step - loss: 0.0033 - val_loss: 0.0034
Epoch 8/20
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 642ms/step - loss: 0.0031 - val_loss: 0.0031
Epoch 9/20
[1m

<keras.src.callbacks.history.History at 0x1fa33db9050>

MAKE COPIES OF THE TEST SET:

In [43]:
predictions = autoencoder.predict(x_test)

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 16ms/step


RESHAPE BOTH THE PREDICTIONS AND THE TEST IMAGES BACK TO 28X28(NO CHANNEL DIMENSION):


In [44]:
original_shape = (x_test.shape[0], 28, 28)
predictions = predictions.reshape(original_shape)
x_test = x_test.reshape(original_shape)
predictions = (predictions * 255.0).astype("uint8")
x_test = (x_test * 255.0).astype("uint8")

GENERATE A COMPARATIVE MOSAIC OF THE ORIGINAL IMAGES AND THE COPIES OUTPUTTED BY TH AUTOENCODER

In [47]:
plot_original_vs_generated(x_test, predictions)