In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os

import numpy as np
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

In [2]:
def load_data():
    transform = transforms.Compose([transforms.ToTensor(),
                                    transforms.Lambda(lambda x: x.view(-1))])

    train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
    test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

    x_train = train_dataset.data.numpy().reshape(-1, 784) / 255.0
    x_test = test_dataset.data.numpy().reshape(-1, 784) / 255.0
    
    return x_train, x_test

In [15]:
def relu(x):
    """Applies the ReLU activation function element-wise.

    Parameters:
        x (np.ndarray): Input array.

    Returns:
        np.ndarray: Array with ReLU applied, where each element is the maximum 
                    of 0 and the input element.
    """
    return np.maximum(0, x)

def sigmoid(x):
    """Applies the Sigmoid activation function element-wise.

    Parameters:
        x (np.ndarray): Input array.

    Returns:
        np.ndarray: Array with Sigmoid applied to each element.
    """
    return 1 / (1 + np.exp(-x))

def lin_combination(x, weights, bias):
    """Computes a linear combination of inputs, weights, and bias.

    Parameters:
        x (np.ndarray): Input array.
        weights (np.ndarray): Weight matrix.
        bias (np.ndarray): Bias vector.

    Returns:
        np.ndarray: Result of the linear combination.
    """
    return np.dot(x, weights) + bias

def mse_loss(x, x_reconstructed):
    return np.mean((x - x_reconstructed)**2)

In [None]:
# 1. Load data
X_train, X_test = load_data()

# Initialize weights and biases with lowercase names
w1 = np.random.randn(784, 512) * 0.01
b1 = np.zeros(512)

w2 = np.random.randn(512, 256) * 0.01 
b2 = np.zeros(256) 

w3 = np.random.randn(256, 128) * 0.01
b3 = np.zeros(128) 

w4 = np.random.randn(128, 256) * 0.01
b4 = np.zeros(256)

w5 = np.random.randn(256, 512) * 0.01
b5 = np.zeros(512)

w6 = np.random.randn(512, 784) * 0.01
b6 = np.zeros(784)

def forward(x):
    """Performs a forward pass through the encoder and decoder network.

    Encoder consists of multiple layers with ReLU activation, 
    and the decoder reconstructs the input using ReLU/Sigmoid activation.

    Parameters:
        x (np.ndarray): Input data.

    Returns:
        tuple: Encoded latent representation (z.T) and reconstructed input (x_reconstructed.T).
    """
    # Encoder: multiple layers with ReLU activation
    layer_1 = relu(lin_combination(x, w1, b1))
    layer_2 = relu(lin_combination(layer_1, w2, b2))
    layer_3 = relu(lin_combination(layer_2, w3, b3)) # Encoded

    # Decoder: multiple layers with ReLU activation for reconstruction
    layer_4 = relu(lin_combination(layer_3, w4, b4)) 
    layer_5 = relu(lin_combination(layer_4, w5, b5))
    layer_6 = relu(lin_combination(layer_5, w6, b6)) # Decoded
    
    return layer_3.T, layer_6.T

# 6. Implement backpropagation and weight updates - Students need to implement
def backward(x, h1, h2, z, h3, h4, x_reconstructed, lr=0.001):
    # Compute gradients and update weights and biases
    pass

# 7. Implement training loop - Students need to implement
# def train(x_train, epochs=100, lr=0.001):
#     # Train the network and collect loss and latent codes
#     return losses, latent_codes, reconstructed_data

# Train the model (Uncomment once students complete the train function)
# losses, latent_codes, reconstructed_data = train(x_train)

# 8. Plot the loss curve
def plot_loss_curve(losses):
    plt.figure(figsize=(8, 6))
    plt.plot(losses, label='MSE Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Loss Curve')
    plt.legend()
    plt.show()

# Uncomment after training to plot the loss curve
# plot_loss_curve(losses)

# 9. Visualize latent codes using PCA
def visualize_latent_space(latent_codes):
    pca = PCA(n_components=2)
    reduced_codes = pca.fit_transform(latent_codes)
    plt.figure(figsize=(8, 6))
    plt.scatter(reduced_codes[:, 0], reduced_codes[:, 1], s=5, alpha=0.6)
    plt.title('Latent Space Visualization (PCA)')
    plt.xlabel('Component 1')
    plt.ylabel('Component 2')
    plt.show()

# Uncomment after training to visualize latent space
# visualize_latent_space(latent_codes)

# 10. Show original and reconstructed images side by side
def show_images(original, reconstructed, n=10):
    plt.figure(figsize=(20, 4))
    for i in range(n):
        ax = plt.subplot(2, n, i + 1)
        plt.imshow(original[i].reshape(28, 28), cmap='gray')
        plt.axis('off')

        ax = plt.subplot(2, n, i + 1 + n)
        plt.imshow(reconstructed[i].reshape(28, 28), cmap='gray')
        plt.axis('off')
    plt.show()

# Uncomment after training to show images
# show_images(x_train, reconstructed_data)