<a href="https://colab.research.google.com/github/Tekleab15/Regularized_Auto_Encoder/blob/main/RAE_Implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Regularized Autoencoder Implementation
This notebook demonstrates the implementation of a Regularized Autoencoder (RAE) using backpropagation, inspired by the methodologies described in "The Neural Coding Framework for Learning Generative Models".



**Dataset Loading and Preprocessing: **all the four datasets used in the paper (MNIST, KMNIST, FMNIST, CalTech101)

# 1.Preprocessing the datasets

In [46]:
# Normalize and convert to binary
def preprocess_dataset(x_train, x_test):
    x_train = x_train.astype('float32') / 255.
    x_test = x_test.astype('float32') / 255.
    x_train = (x_train > 0.5).astype('float32')
    x_test = (x_test > 0.5).astype('float32')
    return x_train, x_test

In [48]:
# loading mnist dataset from the open online keras datasets
from tensorflow.keras.datasets import mnist
# load the dataset as it's splitted into a training and testing sets
(x_train_mnist, y_train_mnist), (x_test_mnist, y_test_mnist) = mnist.load_data()


# 2. Loading KMNIST dataset

In [56]:
# Unlike MNIST datasets KMNIST datasets should be loaded in a little bit different way
import tensorflow_datasets as tfds
import numpy as np
(x_train_kmnist, y_test_kmnist), ds_info = tfds.load(
    'kmnist',
    split=['train', 'test'],
    shuffle_files=True,
    as_supervised=True,
    with_info=True
)
def convert_to_numpy(ds):
    images, labels = [], []
    for img, lbl in tfds.as_numpy(ds):
        images.append(img)
        labels.append(lbl)
    return np.array(images), np.array(labels)

# Convert datasets to numpy arrays
x_train_kmnist, y_train_kmnist = convert_to_numpy(x_train_kmnist)
x_test_kmnist, y_test_kmnist = convert_to_numpy(y_test_kmnist)


In [53]:
# Assuring the dataset is reshaped successfully
# print(f'KMNIST train shape: {x_train_kmnist.shape}, KMNIST test shape: {x_test_kmnist.shape}')

# 3. Loading FMNIST dataset

In [58]:
# Loading the FMNIST dataset
(x_train_fmnist, y_test_fmnist), ds_info = tfds.load(
    'fashion_mnist',
    split=['train', 'test'],
    shuffle_files=True,
    as_supervised=True,
    with_info=True
)

x_train_fmnist, y_train_fmnist = convert_to_numpy(x_train_fmnist)
x_test_fmnist, y_test_fmnist = convert_to_numpy(y_test_fmnist)

# print("The dataset size and shape: ", x_train_fmnist.shape)

# Preprocessing for MNIST, KMNIST, FMNIST datasets

In [59]:
x_train_mnist, x_test_mnist = preprocess_dataset(x_train_mnist, x_test_mnist)
x_train_fmnist, x_test_fmnist = preprocess_dataset(x_train_fmnist, x_test_fmnist)
x_train_kmnist, x_test_kmnist = preprocess_dataset(x_train_kmnist, x_test_kmnist)

# 4. Loading and Preprocessing CalTech101

In [68]:
import tensorflow as tf
# Resize and threshold images to be binary
def resize_and_threshold_images(ds, image_size=(16, 16)):
    resized_images, labels = [], []
    for img, lbl in tfds.as_numpy(ds):
        img_resized = tf.image.resize(img, image_size)
        img_binary = (img_resized > 0.5).numpy().astype('float32')
        resized_images.append(img_binary)
        labels.append(lbl)
    return np.array(resized_images), np.array(labels)

# Load CalTech 101 Silhouettes dataset
(ds_train, ds_test), ds_info = tfds.load(
    'caltech101',
    split=['train', 'test'],
    shuffle_files=True,
    as_supervised=True,
    with_info=True
)

def preprocess_image(image, label):
    image = tf.image.convert_image_dtype(image, tf.float32)  # Normalize
    return image, label

ds_train = ds_train.map(preprocess_image)
ds_test = ds_test.map(preprocess_image)

x_train_caltech, y_train_caltech = resize_and_threshold_images(ds_train)
x_test_caltech, y_test_caltech = resize_and_threshold_images(ds_test)
print(f'CalTech101 train shape: {x_train_caltech.shape}, CalTech101 test shape: {x_test_caltech.shape}')


CalTech101 train shape: (3060, 16, 16, 3), CalTech101 test shape: (6084, 16, 16, 3)
