<a href="https://colab.research.google.com/github/Tekleab15/Regularized_Auto_Encoder/blob/main/RAE_Implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Regularized Autoencoder Implementation
This notebook demonstrates the implementation of a Regularized Autoencoder (RAE) using backpropagation, inspired by the methodologies described in "The Neural Coding Framework for Learning Generative Models".



## Dataset Loading and Preprocessing:
All the four datasets used in the paper (MNIST, KMNIST, FMNIST, CalTech101)

# 1.Preprocessing the datasets

In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
from tensorflow.keras.datasets import mnist


In [46]:
# Normalize and convert to binary
def preprocess_dataset(x_train, x_test):
    x_train = x_train.astype('float32') / 255.
    x_test = x_test.astype('float32') / 255.
    x_train = (x_train > 0.5).astype('float32')
    x_test = (x_test > 0.5).astype('float32')
    return x_train, x_test

In [48]:
# load mnist dataset as it's splitted into a training and testing sets
(x_train_mnist, y_train_mnist), (x_test_mnist, y_test_mnist) = mnist.load_data()


# 2. Loading KMNIST dataset

In [56]:
# Unlike MNIST datasets KMNIST datasets should be loaded in a little bit different way
(x_train_kmnist, y_test_kmnist), ds_info = tfds.load(
    'kmnist',
    split=['train', 'test'],
    shuffle_files=True,
    as_supervised=True,
    with_info=True
)
def convert_to_numpy(ds):
    images, labels = [], []
    for img, lbl in tfds.as_numpy(ds):
        images.append(img)
        labels.append(lbl)
    return np.array(images), np.array(labels)

# Convert datasets to numpy arrays
x_train_kmnist, y_train_kmnist = convert_to_numpy(x_train_kmnist)
x_test_kmnist, y_test_kmnist = convert_to_numpy(y_test_kmnist)


# 3. Loading FMNIST dataset

In [58]:
# Loading the FMNIST dataset
(x_train_fmnist, y_test_fmnist), ds_info = tfds.load(
    'fashion_mnist',
    split=['train', 'test'],
    shuffle_files=True,
    as_supervised=True,
    with_info=True
)

x_train_fmnist, y_train_fmnist = convert_to_numpy(x_train_fmnist)
x_test_fmnist, y_test_fmnist = convert_to_numpy(y_test_fmnist)

# print("The dataset size and shape: ", x_train_fmnist.shape)

# Preprocessing for MNIST, KMNIST, FMNIST datasets

In [73]:
# Preprocessing the MNIST dataset
x_train_mnist, x_test_mnist = preprocess_dataset(x_train_mnist, x_test_mnist)
# Preprocessing the KMNIST dataset
x_train_kmnist, x_test_kmnist = preprocess_dataset(x_train_kmnist, x_test_kmnist)
# Preprocessing the FMNIST dataset
x_train_fmnist, x_test_fmnist = preprocess_dataset(x_train_fmnist, x_test_fmnist)

# 4. Loading and Preprocessing CalTech101

In [74]:
# Preprocess images: normalize, resize, and threshold to binary
def preprocess_image(image, label, image_size=(16, 16)):
    image = tf.image.convert_image_dtype(image, tf.float32)  # Normalize
    image = tf.image.resize(image, image_size)  # Resize
    image = tf.cast(image > 0.5, tf.float32)  # Threshold to binary
    return image, label

# Load CalTech101 dataset
(ds_train, ds_test), ds_info = tfds.load(
    'caltech101',
    split=['train', 'test'],
    shuffle_files=True,
    as_supervised=True,
    with_info=True
)

# Apply preprocessing to the dataset
ds_train = ds_train.map(lambda img, lbl: preprocess_image(img, lbl))
ds_test = ds_test.map(lambda img, lbl: preprocess_image(img, lbl))

# Convert to numpy arrays
def convert_to_numpy(ds):
    images, labels = [], []
    for img, lbl in tfds.as_numpy(ds):
        images.append(img)
        labels.append(lbl)
    return np.array(images), np.array(labels)

x_train_caltech, y_train_caltech = convert_to_numpy(ds_train)
x_test_caltech, y_test_caltech = convert_to_numpy(ds_test)

# Flatten the images for the autoencoder input
x_train_caltech = x_train_caltech.reshape((x_train_caltech.shape[0], -1))
x_test_caltech = x_test_caltech.reshape((x_test_caltech.shape[0], -1))

print("CalTech101 train shape:", x_train_caltech.shape)
print("CalTech101 test shape:", x_test_caltech.shape)

CalTech101 train shape: (3060, 768)
CalTech101 test shape: (6084, 768)
