In [None]:
# === Environment Setup ===
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, Markdown, Image
import tensorflow as tf
from tensorflow.keras import layers, models
import os

# --- Configuration ---
plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams.update({'font.size': 14, 'figure.figsize': (10, 6), 'figure.dpi': 150})
np.set_printoptions(suppress=True, linewidth=120, precision=4)

# --- Utility Functions ---
def note(msg): display(Markdown(f"<div class='alert alert-info'>📝 {msg}</div>"))
def sec(title): print(f'\n{80*"="}\n| {title.upper()} |\n{80*"="}')

note("Environment initialized for Autoencoders.")

# Chapter 7.11: Autoencoders

---

### Table of Contents

1.  [**Introduction: Data Compression and Feature Learning**](#intro)
2.  [**The Autoencoder Architecture**](#architecture)
3.  [**Code Lab: Building a Simple Autoencoder for Denoising**](#code-lab)
4.  [**Variational Autoencoders (VAEs)**](#vaes)
5.  [**Summary**](#summary)

<a id='intro'></a>
## 1. Introduction: Data Compression and Feature Learning

An **autoencoder** is a type of neural network used for unsupervised learning. Its primary purpose is to learn a compressed, lower-dimensional representation (an **encoding**) of a dataset. The concept dates back to the 1980s and was explored by researchers like Geoffrey Hinton as a method for using neural networks to tackle high-dimensional data.

Think of it like creating a summary of a book. To write a good summary, you must first understand the book's key themes, characters, and plot points. You compress the high-dimensional information of the full text into a low-dimensional summary. Then, someone else could read your summary and reconstruct a general, though not perfectly detailed, idea of the original book.

Similarly, an autoencoder is trained to reconstruct its own input. This seemingly trivial task forces the network's internal layers to learn the most important, latent features of the data. By learning to ignore noise and capture only the essential patterns, it can effectively compress the data and then decompress it back into a form that closely resembles the original.

<a id='architecture'></a>
## 2. The Autoencoder Architecture

An autoencoder consists of two main parts:
- **The Encoder:** This part of the network takes the input data and compresses it into a lower-dimensional latent space. This compressed vector is the "encoding" or the "bottleneck."
- **The Decoder:** This part of the network takes the compressed encoding and reconstructs the data back to its original dimensions.

The network is trained by minimizing the **reconstruction loss**, which is a measure of the difference between the original input and the reconstructed output. The choice of loss function depends on the nature of the input data. For image data with pixel values between 0 and 1, **Binary Cross-Entropy** is often effective, treating each pixel as a Bernoulli distribution. For continuous data, **Mean Squared Error (MSE)** is a common choice.

![Autoencoder Architecture](../images/07-Machine-Learning/autoencoder_architecture.png)

<a id='code-lab'></a>
## 3. Code Lab: Building a Simple Autoencoder for Denoising

A powerful application of autoencoders is **denoising**. We can train an autoencoder to reconstruct clean images from noisy ones. This forces the model to learn the underlying structure of the data, ignoring the noise.

In [None]:
sec("Building and Training a Denoising Autoencoder")

# Load MNIST data
(x_train, _), (x_test, _) = tf.keras.datasets.mnist.load_data()
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
x_train = np.reshape(x_train, (len(x_train), 28, 28, 1))
x_test = np.reshape(x_test, (len(x_test), 28, 28, 1))

# Add random noise
noise_factor = 0.5
x_train_noisy = x_train + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_train.shape)
x_test_noisy = x_test + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_test.shape)
x_train_noisy = np.clip(x_train_noisy, 0., 1.)
x_test_noisy = np.clip(x_test_noisy, 0., 1.)

# --- Model Architecture ---
# We define a convolutional autoencoder. This is a good choice for image data
# because convolutional layers are excellent at capturing spatial hierarchies.
input_img = tf.keras.Input(shape=(28, 28, 1))

# The Encoder
# We use a stack of Conv2D and MaxPooling2D layers to compress the image.
# Each MaxPooling2D layer halves the spatial dimensions, forcing the network
# to learn a more compressed representation.
x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(input_img)
x = layers.MaxPooling2D((2, 2), padding='same')(x)
x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(x)
encoded = layers.MaxPooling2D((2, 2), padding='same')(x)

# The Decoder
# The decoder mirrors the encoder's architecture, but in reverse.
# We use UpSampling2D to increase the dimensions, aiming to reconstruct
# the original image.
x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(encoded)
x = layers.UpSampling2D((2, 2))(x)
x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(x)
x = layers.UpSampling2D((2, 2))(x)
decoded = layers.Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)

autoencoder = models.Model(input_img, decoded)
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')

note("Training autoencoder for 10 epochs. This may take a few minutes.")
autoencoder.fit(x_train_noisy, x_train, epochs=10, batch_size=128, shuffle=True, validation_data=(x_test_noisy, x_test))

note("Autoencoder training complete. Now, let's visualize the results.")

# --- Visualize Denoising ---
decoded_imgs = autoencoder.predict(x_test_noisy)

n = 10
plt.figure(figsize=(20, 4))
for i in range(n):
    # Display original
    ax = plt.subplot(2, n, i + 1)
    plt.imshow(x_test_noisy[i].reshape(28, 28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # Display reconstruction
    ax = plt.subplot(2, n, i + 1 + n)
    plt.imshow(decoded_imgs[i].reshape(28, 28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
plt.show()

<a id='vaes'></a>
## 4. Variational Autoencoders (VAEs)

**Variational Autoencoders (VAEs)** are a more advanced, generative type of autoencoder. Instead of learning a single point encoding for each input, a VAE learns a **probability distribution** in the latent space. This allows us to sample from the latent space to generate new, synthetic data that resembles the original training data. VAEs are a key component of modern generative AI and are discussed in more detail in the **Chapter on Generative Models**.

![VAE Architecture](../images/07-Machine-Learning/VAE_architecture.png)

<a id='summary'></a>
## 5. Summary

Autoencoders are a versatile tool for unsupervised learning. They provide a powerful way to learn compressed representations of data, which can be used for dimensionality reduction, feature learning, and denoising. Their generative extension, the VAE, is a cornerstone of modern generative modeling.