In [4]:
import numpy as np
import pandas as pd

# Generate synthetic data for demonstration (e.g., a simple 2D Gaussian blob)
np.random.seed(42)
data = pd.DataFrame(np.random.randn(1000, 2), columns=["x1", "x2"])
print(data.head())
data = data.values  # Convert DataFrame to NumPy array

# Define helper functions
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def relu(x):
    return np.maximum(0, x)

# Initialize weights and biases for encoder and decoder
input_dim = 2       # Input features
hidden_dim = 4      # Hidden layer size
latent_dim = 2      # Latent space size

# Encoder weights and biases
W_enc1 = np.random.randn(input_dim, hidden_dim) * 0.1
b_enc1 = np.zeros((1, hidden_dim))
W_mu = np.random.randn(hidden_dim, latent_dim) * 0.1
b_mu = np.zeros((1, latent_dim))
W_logvar = np.random.randn(hidden_dim, latent_dim) * 0.1
b_logvar = np.zeros((1, latent_dim))

# Decoder weights and biases
W_dec1 = np.random.randn(latent_dim, hidden_dim) * 0.1
b_dec1 = np.zeros((1, hidden_dim))
W_out = np.random.randn(hidden_dim, input_dim) * 0.1
b_out = np.zeros((1, input_dim))

# Hyperparameters
learning_rate = 0.01
epochs = 1000

# Training loop
for epoch in range(epochs):
    # Forward pass
    # 1. Encoder
    hidden_enc = relu(data @ W_enc1 + b_enc1)
    mu = hidden_enc @ W_mu + b_mu
    logvar = hidden_enc @ W_logvar + b_logvar

    # 2. Reparameterization trick
    std = np.exp(0.5 * logvar)
    eps = np.random.randn(*std.shape)  # Random noise
    z = mu + eps * std  # Sampled latent vector

    # 3. Decoder
    hidden_dec = relu(z @ W_dec1 + b_dec1)
    reconstructed = sigmoid(hidden_dec @ W_out + b_out)

    # Compute the losses
    # Reconstruction loss (MSE)
    reconstruction_loss = np.mean(np.sum((data - reconstructed)**2, axis=1))

    # KL divergence loss
    kl_loss = -0.5 * np.mean(np.sum(1 + logvar - mu**2 - np.exp(logvar), axis=1))

    # Total loss
    total_loss = reconstruction_loss + kl_loss

    # Backward pass (manual gradient calculation)
    # Gradients for decoder
    d_reconstructed = 2 * (reconstructed - data) / data.shape[0]  # Derivative of MSE
    d_hidden_dec = d_reconstructed @ W_out.T * (hidden_dec > 0)
    d_W_out = hidden_dec.T @ d_reconstructed
    d_b_out = np.sum(d_reconstructed, axis=0, keepdims=True)

    # Gradients for decoder weights
    d_W_dec1 = z.T @ d_hidden_dec
    d_b_dec1 = np.sum(d_hidden_dec, axis=0, keepdims=True)

    # Gradients for encoder
    d_z = d_hidden_dec @ W_dec1.T
    d_mu = d_z
    d_logvar = d_z * eps * 0.5 * np.exp(0.5 * logvar)

    d_hidden_enc = (d_mu @ W_mu.T + d_logvar @ W_logvar.T) * (hidden_enc > 0)
    d_W_mu = hidden_enc.T @ d_mu
    d_b_mu = np.sum(d_mu, axis=0, keepdims=True)
    d_W_logvar = hidden_enc.T @ d_logvar
    d_b_logvar = np.sum(d_logvar, axis=0, keepdims=True)

    # Update weights for encoder
    W_mu -= learning_rate * d_W_mu
    b_mu -= learning_rate * d_b_mu
    W_logvar -= learning_rate * d_W_logvar
    b_logvar -= learning_rate * d_b_logvar
    W_enc1 -= learning_rate * (data.T @ d_hidden_enc)
    b_enc1 -= learning_rate * np.sum(d_hidden_enc, axis=0, keepdims=True)

    # Update weights for decoder
    W_out -= learning_rate * d_W_out
    b_out -= learning_rate * d_b_out
    W_dec1 -= learning_rate * d_W_dec1
    b_dec1 -= learning_rate * d_b_dec1

    # Print loss
    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {total_loss:.4f}, Reconstruction Loss: {reconstruction_loss:.4f}, KL Loss: {kl_loss:.4f}")

# Final output
print("\nTraining complete!")


         x1        x2
0  0.496714 -0.138264
1  0.647689  1.523030
2 -0.234153 -0.234137
3  1.579213  0.767435
4 -0.469474  0.542560
Epoch 0, Loss: 2.3696, Reconstruction Loss: 2.3692, KL Loss: 0.0004
Epoch 100, Loss: 2.1065, Reconstruction Loss: 2.1057, KL Loss: 0.0008
Epoch 200, Loss: 2.0183, Reconstruction Loss: 2.0158, KL Loss: 0.0024
Epoch 300, Loss: 1.9871, Reconstruction Loss: 1.9805, KL Loss: 0.0066
Epoch 400, Loss: 1.9826, Reconstruction Loss: 1.9675, KL Loss: 0.0152
Epoch 500, Loss: 1.9879, Reconstruction Loss: 1.9542, KL Loss: 0.0338
Epoch 600, Loss: 2.0618, Reconstruction Loss: 1.9369, KL Loss: 0.1250
Epoch 700, Loss: nan, Reconstruction Loss: nan, KL Loss: nan
Epoch 800, Loss: nan, Reconstruction Loss: nan, KL Loss: nan


  return 1 / (1 + np.exp(-x))
  kl_loss = -0.5 * np.mean(np.sum(1 + logvar - mu**2 - np.exp(logvar), axis=1))
  logvar = hidden_enc @ W_logvar + b_logvar
  hidden_dec = relu(z @ W_dec1 + b_dec1)
  reconstructed = sigmoid(hidden_dec @ W_out + b_out)
  kl_loss = -0.5 * np.mean(np.sum(1 + logvar - mu**2 - np.exp(logvar), axis=1))
  d_W_out = hidden_dec.T @ d_reconstructed
  d_W_dec1 = z.T @ d_hidden_dec
  d_z = d_hidden_dec @ W_dec1.T
  d_logvar = d_z * eps * 0.5 * np.exp(0.5 * logvar)
  d_W_mu = hidden_enc.T @ d_mu
  d_W_mu = hidden_enc.T @ d_mu
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Epoch 900, Loss: nan, Reconstruction Loss: nan, KL Loss: nan

Training complete!
