<a href="https://colab.research.google.com/github/PCBZ/CS6140/blob/main/HW2B_problem1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np

class AutoEncoder:
    """
    AutoEncoder class
    Architecture:
    Input (8) -> Hidden (3) -> Output (3)
    Use sigmoid activation function
    """
    def __init__(self, learning_rate = 0.5):
        self.learning_rate = learning_rate

        # Initialize weights with small random values (nontrivial initialization)
        # First layer: (8, 3)
        self.weights1 = np.random.randn(8, 3) * 0.5
        self.bias1 = np.zeros((1, 3))

        # Second layer: (3, 8)
        self.weights2 = np.random.randn(3, 8) * 0.5
        self.bias2 = np.zeros((1, 8))

        # Store forward pass values
        self.input = None
        self.hidden_input = None
        self.hidden_output = None
        self.final_input = None
        self.final_output = None

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-np.clip(x, -500, 500)))

    def sigmoid_derivative(self, x):
        return x * (1 - x)

    def mse_loss(self, y_true, y_pred):
        return np.mean((y_true - y_pred) ** 2)

    def forward_pass(self, X):
        """
        Forward pass through the network
        X: input (n, 8) data
        Returns: output (n, 8)
        """
        self.input = X

        # First Layer: Input -> Hidden
        self.hidden_input = self.input @ self.weights1 + self.bias1

        # Apply sigmod activation to get hidden output, shape (n, 3)
        self.hidden_output = self.sigmoid(self.hidden_input)

        # Second Layer: Hidden -> Output
        self.final_input = self.hidden_output @ self.weights2 + self.bias2

        # Apply sigmod activation to get final output, shape (n, 8)
        self.final_output = self.sigmoid(self.final_input)

        return self.final_output

    def backward_pass(self, X, y):
        """
        Backward pass through the network
        X: input (n, 8) data
        y: target (n, 8) data
        """
        n = X.shape[0]

        # Derivative of MSE loss w.r.t final output
        total_elements = self.final_output.size
        d_loss_d_final_output = 2 * (self.final_output - y) / total_elements

        # Derivative of final output w.r.t final input
        d_final_output_d_final_input = self.sigmoid_derivative(self.final_output)

        # Gradient for second layer
        d_loss_d_final_input = d_loss_d_final_output * d_final_output_d_final_input

        # Derivative of MSE loss w.r.t second layer weights
        d_loss_d_weights2 = self.hidden_output.T @ d_loss_d_final_input

        # Derivative of MSE loss w.r.t second layer bias
        d_loss_d_bias2 = np.sum(d_loss_d_final_input, axis = 0, keepdims = True)

        # Propagate gradient back to hidden layer
        d_loss_d_hidden_output = d_loss_d_final_input @ self.weights2.T

        # Derivative of hidden output w.r.t hidden input
        d_hidden_output_d_hidden_input = self.sigmoid_derivative(self.hidden_output)

        # Gradient for first layer
        d_loss_d_hidden_input = d_loss_d_hidden_output * d_hidden_output_d_hidden_input

        # Derivative of MSE loss w.r.t first layer weights
        d_loss_d_weights1 = self.input.T @ d_loss_d_hidden_input

        # Derivative of MSE loss w.r.t first layer bias
        d_loss_d_bias1 = np.sum(d_loss_d_hidden_input, axis = 0, keepdims = True)

        return d_loss_d_weights1, d_loss_d_bias1, d_loss_d_weights2, d_loss_d_bias2

    def update_weights(self, gradient):
        """
        Update weights and biases
        gradients: tuple of (d_weights1, d_bias1, d_weights2, d_bias2)
        """
        d_weights1, d_bias1, d_weights2, d_bias2 = gradient

        self.weights1 -= self.learning_rate * d_weights1
        self.bias1 -= self.learning_rate * d_bias1
        self.weights2 -= self.learning_rate * d_weights2
        self.bias2 -= self.learning_rate * d_bias2

    def train(self, X, epochs = 1000, target_mse = 0.001):
        """
        Train the network
        X: input (n, 8) data
        epochs: number of epochs
        target_mse: target mean squared error
        """
        loss_history = []

        for epoch in range(epochs):
            # Forward pass
            reconstructed = self.forward_pass(X)

            # Calculate loss
            loss = self.mse_loss(X, reconstructed)
            loss_history.append(loss)

            if loss < target_mse:
                print(f"Converged at epoch {epoch} with loss {loss}")
                break

            # Backward pass
            gradients = self.backward_pass(X, X)

            # Update weights
            self.update_weights(gradients)

        return loss_history

    def get_encoding(self, X):
        """
        Get the hidden layer representation (encoding) of input X
        """
        _ = self.forward_pass(X)
        return self.hidden_output

def main():
    X = np.eye(8)

    print("Training Data (8-bit identity patterns):")
    print(X)
    print("\n" + "="*60 + "\n")

    autoEncoder = AutoEncoder(learning_rate = 2.0)

    print("Training Autoencoder...")
    print("Target: MSE < 0.001")
    print("="*60)

    losses = autoEncoder.train(X, epochs = 40000, target_mse = 0.001)

    print("\n" + "="*60)
    print("Reconstruction Results:")
    print("="*60)

    reconstructed = autoEncoder.forward_pass(X)
    encoding = autoEncoder.get_encoding(X)

    print("\nInput -> Encoding (Hidden Layer) -> Reconstructed Output")
    print("-"*60)

    for i in range(len(X)):
        input_str = ''.join([str(int(x)) for x in X[i]])
        encoding_str = ' '.join([f"{h:.3f}" for h in encoding[i]])
        output_str = ' '.join([f"{o:.3f}" for o in reconstructed[i]])

        print(f"{input_str} -> [{encoding_str}] -> [{output_str}]")

    # Binary reconstruction (threshold at 0.5)
    binary_reconstructed = (reconstructed > 0.5).astype(int)
    accuracy = np.mean(binary_reconstructed == X) * 100

    print(f"\nReconstruction Accuracy: {accuracy:.2f}%")
    print(f"Final MSE: {autoEncoder.mse_loss(X, reconstructed):.6f}")

if __name__ == "__main__":
    main()




Training Data (8-bit identity patterns):
[[1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1.]]


Training Autoencoder...
Target: MSE < 0.001
Converged at epoch 25878 with loss 0.0009999665769880478

Reconstruction Results:

Input -> Encoding (Hidden Layer) -> Reconstructed Output
------------------------------------------------------------
10000000 -> [0.939 0.973 0.010] -> [0.942 0.027 0.000 0.057 0.029 0.000 0.000 0.000]
01000000 -> [0.914 0.026 0.018] -> [0.042 0.951 0.031 0.000 0.000 0.000 0.044 0.000]
00100000 -> [0.015 0.011 0.063] -> [0.000 0.040 0.942 0.000 0.042 0.000 0.000 0.033]
00010000 -> [0.989 0.985 0.984] -> [0.028 0.000 0.000 0.913 0.000 0.031 0.027 0.000]
00001000 -> [0.011 0.885 0.017] -> [0.044 0.000 0.029 0.000 0.946 0.043 0.000 0.000]
00000100 -> [0.013 0.985 0.969] -> [0.000 0.000 0.000 0.056 0.027 0.94

# Explanation the purpose of the training algorithm
The network has 8 inputs → 3 hidden neurons → 8 outputs, implementing encoding: 8 to 3 values and decoding 3 back to 8 values.

## What Training Accomplishes
Through the iterations, backpropagation teaches the network to:
1. **Discover its own compression scheme** - like learning to create a ZIP file
2. **Find essential patterns** - what information is crucial to preserve?
3. **Minimize reconstruction error** - get as close to the original as possible

## Potential Real-World Applications
- **Data compression** -learned automatically, not hard coded
- **Noise removal** -reconstruct clean data from noise
- **Anomaly detection** -unusual inputs won't reconstruct well

Instead of manually designing a compression algorithm, the network learns one from examples through training, potentially discovering more efficient schemes than humans would devise.