<a href="https://colab.research.google.com/github/MuhammadTaha25/Deep-Learning/blob/main/Autoencoder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import the Fashion MNIST dataset (70,000 grayscale images of clothing items)
from tensorflow.keras.datasets import fashion_mnist

# Import the Sequential and Model classes to define neural network architectures
from tensorflow.keras.models import Sequential, Model

# Import layers:
#  - Input: to define the input tensor
#  - Reshape: to reshape tensors
#  - Dense: for fully connected layers
#  - Conv2D: for 2D convolutional layers
#  - MaxPooling2D: for downsampling feature maps
#  - UpSampling2D: for upsampling in decoder networks
#  - Flatten: to flatten feature maps into vectors
from tensorflow.keras.layers import Input, Reshape, Dense, Conv2D, MaxPooling2D, UpSampling2D, Flatten

# Import utility to convert integer labels to one-hot encoded vectors
from tensorflow.keras.utils import to_categorical

# Import Cropping2D layer in case you need to crop input images
from tensorflow.keras.layers import Cropping2D

# Matplotlib for plotting images and training curves
import matplotlib.pyplot as plt

# NumPy for numerical operations on arrays
import numpy as np


In [None]:
# Load the Fashion MNIST dataset:
#   • (X, Y) are the training images (X) and their labels (Y)
#   • (x, y) are the test images (x) and their labels (y)
(X, Y), (x, y) = fashion_mnist.load_data()


In [None]:
# Reshape training images to 4D tensors:
#   • X.shape[0]: number of training samples
#   • 28, 28: image width and height
#   • 1: single channel for grayscale
X = X.reshape(X.shape[0], 28, 28, 1)

# Reshape test images similarly to include the channel dimension
x = x.reshape(x.shape[0], 28, 28, 1)


In [None]:
# Convert pixel values from integers to 32-bit floats for numerical stability
x = x.astype('float32')
X = X.astype('float32')

# Normalize pixel values to the range [0, 1] by dividing by the maximum value 255
x /= 255.0
X /= 255.0


In [None]:
# Convert integer class labels to one-hot encoded vectors for 10 classes
y = to_categorical(y, 10)  # Test labels
Y = to_categorical(Y, 10)  # Training labels


In [None]:
type(X)

numpy.ndarray

In [None]:
# Define the input tensor for the model:
#   • shape=(28, 28, 1) corresponds to 28×28 pixels with 1 channel (grayscale)
input_img = Input(shape=(28, 28, 1))


In [None]:
# Define a new input layer for the convolutional autoencoder
# Shape: 28×28 pixels, 1 channel (grayscale)
input_layer_cnv = Input(shape=(28, 28, 1))

# ---------------------
# Encoder
# ---------------------

# First convolution + pooling block
# • 32 filters of size 3×3, ReLU activation
# • ‘same’ padding preserves spatial dimensions
ae_cnv_en = Conv2D(32, (3, 3), activation="relu", padding="same", kernel_initializer="he_normal")(input_layer_cnv)
ae_cnv_en = MaxPooling2D((2, 2), padding="same")(ae_cnv_en)

# Second convolution + pooling block
# • 32 filters of size 3×3, ReLU activation
# • ‘same’ padding to keep feature map size
ae_cnv_en = Conv2D(32, (3, 3), activation="relu", padding="same")(ae_cnv_en)
ae_cnv_en = MaxPooling2D((2, 2), padding="same")(ae_cnv_en)

# Third convolution + pooling block
# • 4 filters of size 3×3, ReLU activation
# • This drastically reduces channels for a compact bottleneck
ae_cnv_en = Conv2D(4, (3, 3), activation="relu", padding="same")(ae_cnv_en)
ae_cnv_en = MaxPooling2D((2, 2), padding="same")(ae_cnv_en)

# Flatten the feature maps into a vector (the “bottleneck” representation)
ae_cnv_en = Flatten(name="bot")(ae_cnv_en)

# ---------------------
# Decoder
# ---------------------

# Reshape the bottleneck vector back into a small feature map
# Input shape for Reshape must match the flattened size: here 4×4×4 = 64
ae_cnv_de = Reshape((4, 4, 4), input_shape=(64,), name="botnext0")(ae_cnv_en)

# First upsampling + convolution block
# • Upsample by a factor of 2 (4×4 → 8×8)
# • 4 filters of size 3×3, ReLU activation
ae_cnv_de = Conv2D(4, (3, 3), activation="relu", padding="same", name="botnext1")(ae_cnv_de)
ae_cnv_de = UpSampling2D((2, 2), name="botnext2")(ae_cnv_de)

# Second upsampling + convolution block
# • Upsample by 2 again (8×8 → 16×16)
# • 32 filters of size 3×3, ReLU activation
ae_cnv_de = Conv2D(32, (3, 3), activation="relu", padding="same", name="botnext3")(ae_cnv_de)
ae_cnv_de = UpSampling2D((2, 2), name="botnext4")(ae_cnv_de)

# Third upsampling + convolution block
# • Upsample by 2 (16×16 → 32×32)
# • 32 filters, but note padding="valid" here will reduce spatial dims slightly
ae_cnv_de = Conv2D(32, (3, 3), activation="relu", padding="valid", name="botnext5")(ae_cnv_de)
ae_cnv_de = UpSampling2D((2, 2), name="botnext6")(ae_cnv_de)

# Final convolution to map back to single-channel image
# • 1 filter of size 3×3, sigmoid activation to produce outputs in [0,1]
ae_cnv_de = Conv2D(1, (3, 3), activation="sigmoid", padding="same", name="botnext7")(ae_cnv_de)

  super().__init__(**kwargs)


In [None]:
# Instantiate the autoencoder model, specifying inputs and outputs
Ae_Conv = Model(inputs=input_layer_cnv, outputs=ae_cnv_de)

# Compile the model with:
# • Adam optimizer
# • Binary crossentropy loss for pixel-wise comparison
# • Track accuracy metric (though visual quality is often a better judge)
Ae_Conv.compile(optimizer='adam', loss='binary_crossentropy', metrics=["accuracy"])

# Alternative optimizer example (commented out):
# Ae_Conv.compile(optimizer=tf.keras.optimizers.Adadelta(0.1, clipvalue=2),
#                 loss='binary_crossentropy', metrics=["accuracy"])

# Print the model architecture summary
Ae_Conv.summary()

In [None]:
# Create a sub-model that starts at the decoder’s entry point (just before the first reshape)
# and outputs the tensor right after that reshape layer.
# This lets you inspect or use the decoder’s input feature maps directly.
ae_cnv_en = Model(
    inputs=Ae_Conv.get_layer('bottleneck_reshape').input,  # the input to the Reshape layer
    outputs=Ae_Conv.get_layer('botnext0').output           # the output of the Reshape layer
)


In [None]:
# Encoder model summary
ae_cnv_en.summary()

In [None]:
# Create a decoder sub-model that maps from the bottleneck’s reshaped feature maps
# directly through to the final decoder layer named "decode8".
# This lets you run just the decoder portion, starting at its Reshape input,
# and inspect or generate output halfway through decoding.
ae_cnv_de = Model(
    inputs=autoencoder.get_layer('bottleneck_reshape').input,  # 4×4×4 feature map entering the decoder
    outputs=autoencoder.get_layer('decode8').output           # output of the layer "decode8"
)


In [None]:
# Encoder model summary
ae_cnv_de.summary()

In [None]:
# Compile the decoder-input sub-model:
#  • loss='categorical_crossentropy' – measures the difference between predicted and true distributions
#  • optimizer='adam' – adaptive gradient-based optimizer
#  • metrics=['accuracy'] – track accuracy during training/evaluation
ae_cnv_en.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)


In [None]:
# Train the convolutional autoencoder:
#  • Inputs: X (normalized training images)
#  • Targets: X (we want the output to reconstruct the input)
#  • epochs=10: run through the entire dataset 10 times
#  • batch_size=1024: process 1024 samples per gradient update
#  • verbose=2: display one line per epoch with loss/metrics
#  • shuffle=True: shuffle the training data at each epoch for better generalization
#  • validation_split=0.1: use 10% of the training data for validation after each epoch
history = Ae_Conv.fit(
    X, X,
    epochs=10,
    batch_size=1024,
    shuffle=True,
    verbose=2,
    validation_split=0.1
)


Epoch 1/10
53/53 - 121s - 2s/step - accuracy: 0.4941 - loss: 0.4760 - val_accuracy: 0.4953 - val_loss: 0.3624
Epoch 2/10


KeyboardInterrupt: 