# Library

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Base Layer Class


In [None]:
class Layer:
    def forward(self, x):
        raise NotImplementedError

    def backward(self, grad):
        raise NotImplementedError


# Dense Layer


In [None]:
class Dense(Layer):
    def __init__(self, input_dim, output_dim):
        self.W = np.random.randn(input_dim, output_dim) * np.sqrt(2.0 / input_dim)  # small weights
        self.b = np.zeros((1, output_dim))                     # zero bias

    def forward(self, x):
        self.x = x                                             # save input
        return x @ self.W + self.b                             # Wx + b

    def backward(self, grad):
        # grad = dL/dZ (incoming gradient)

        # Compute gradients
        self.dW = self.x.T @ grad                              # dL/dW
        self.db = np.sum(grad, axis=0, keepdims=True)          # dL/db

        # Return gradient for next layer (dL/dX)
        return grad @ self.W.T


# Activation

In [None]:
## Activation: Tanh
class Tanh(Layer):
    def forward(self, x):
        self.out = np.tanh(x)
        return self.out

    def backward(self, grad):
        return grad * (1 - self.out ** 2)   # tanh derivative


# Activation: Sigmoid
class Sigmoid(Layer):
    def forward(self, x):
        self.out = 1 / (1 + np.exp(-x))
        return self.out

    def backward(self, grad):
        return grad * (self.out * (1 - self.out))   # sigmoid derivative


# Loss: Mean Squared Error


In [None]:
def mse_loss(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)


def mse_grad(y_true, y_pred):
    return (2 / (y_true.shape[0] * y_true.shape[1])) * (y_pred - y_true)



# Optimizer: SGD


In [None]:
class SGD:
    def __init__(self, lr=0.1):
        self.lr = lr

    def step(self, layers):
        for layer in layers:
            if hasattr(layer, "W"):
                layer.W -= self.lr * layer.dW
                layer.b -= self.lr * layer.db



# Model Class


In [None]:
class Sequential:
    def __init__(self, layers):
        self.layers = layers

    def forward(self, x):
        for l in self.layers:
            x = l.forward(x)
        return x

    def backward(self, grad):
        for l in reversed(self.layers):
            grad = l.backward(grad)

class ReLU(Layer):
    def __init__(self):
        self._cache = None
    def forward(self, X):
        self._cache = X
        return np.maximum(0, X)
    def backward(self, d_out):
        X = self._cache
        dX = d_out * (X > 0).astype(float)
        return dX


# Part 1: XOR Example


In [None]:
def train_xor():
    X = np.array([[0.,0.],
                  [0.,1.],
                  [1.,0.],
                  [1.,1.]])
    Y = np.array([[0.],[1.],[1.],[0.]])

    model = Sequential([
        Dense(2, 4),
        Tanh(),
        Dense(4, 1),
        Sigmoid()
    ])

    opt = SGD(lr=0.1)

    epochs = 100000
    for ep in range(1, epochs+1):
        y_pred = model.forward(X)
        loss = mse_loss(Y, y_pred)
        d_pred = mse_grad(Y, y_pred)
        model.backward(d_pred)
        opt.step(model.layers)
        if ep % 10000 == 0:
            print(f"XOR Epoch {ep}/{epochs} - loss: {loss:.6f}")

    preds = model.forward(X)
    print("XOR final predictions (raw):")
    print(preds)
    print("XOR final predictions (rounded):")
    print((preds > 0.5).astype(int))

# Part 2: Autoencoder + SVM


In [None]:

# MNIST loader
def load_mnist_flat():
    from tensorflow.keras.datasets import mnist
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_train = x_train.reshape(-1, 784).astype(np.float32) / 255.0
    x_test  = x_test.reshape(-1, 784).astype(np.float32) / 255.0
    return x_train, y_train, x_test, y_test


def build_autoencoder(latent_dim=32):
    encoder = Sequential([
        Dense(784, 512),
        ReLU(),
        Dense(512, 128),
        ReLU(),
        Dense(128, latent_dim),

    ])

    decoder = Sequential([
        Dense(latent_dim, 128),
        ReLU(),
        Dense(128, 512),
        ReLU(),
        Dense(512, 784),
        Sigmoid()
    ])

    autoencoder = Sequential(encoder.layers + decoder.layers)
    return encoder, decoder, autoencoder


def train_autoencoder(epochs=5, batch_size=256, lr=0.1, latent_dim=32):
    losses = []
    X_train, y_train, X_test, y_test = load_mnist_flat()
    encoder, decoder, autoencoder = build_autoencoder(latent_dim)

    opt = SGD(lr=lr)
    N = X_train.shape[0]
    steps = max(1, N // batch_size)

    print("\nTraining autoencoder:")
    for epoch in range(1, epochs + 1):
        perm = np.random.permutation(N)
        X_train = X_train[perm]
        epoch_loss = 0.0

        for step in range(steps):
            start = step * batch_size
            end = min(start + batch_size, N)
            xb = X_train[start:end]

            y_pred = autoencoder.forward(xb)

            loss = mse_loss(xb, y_pred)
            epoch_loss += loss

            grad = mse_grad(xb, y_pred)
            autoencoder.backward(grad)

            opt.step(autoencoder.layers)

        epoch_loss /= steps
        print(f"Epoch {epoch}/{epochs} - loss: {epoch_loss:.6f}")
        losses.append(epoch_loss)

    return encoder, decoder, autoencoder, X_test, y_test, losses


def extract_latent_features(encoder, X):
    return encoder.forward(X)


def train_svm(latent_train, y_train, latent_test, y_test):
    from sklearn.svm import SVC
    from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

    clf = SVC(kernel="rbf")
    clf.fit(latent_train, y_train)

    preds = clf.predict(latent_test)

    acc = accuracy_score(y_test, preds)
    cm = confusion_matrix(y_test, preds)
    cr = classification_report(y_test, preds, digits=4)

    print("\nSVM Results:")
    print("Accuracy:", acc)
    print("Confusion matrix:\n", cm)
    print("Classification report:\n", cr)

    return acc, cm, cr

# Gradient Checking


In [None]:
def gradient_check():
    np.random.seed(0)

    X = np.random.randn(5, 3)
    Y = np.random.randn(5, 2)

    layer = Dense(3, 2)

    # Forward
    out = layer.forward(X)
    loss = mse_loss(Y, out)

    # Backward (analytical gradient)
    grad_out = mse_grad(Y, out)
    layer.backward(grad_out)
    grad_analytic = layer.dW.copy()

    # Numerical gradient
    eps = 1e-5
    grad_numeric = np.zeros_like(layer.W)

    for i in range(layer.W.shape[0]):
        for j in range(layer.W.shape[1]):
            W_orig = layer.W[i, j]

            layer.W[i, j] = W_orig + eps
            loss_plus = mse_loss(Y, layer.forward(X))

            layer.W[i, j] = W_orig - eps
            loss_minus = mse_loss(Y, layer.forward(X))

            grad_numeric[i, j] = (loss_plus - loss_minus) / (2 * eps)
            layer.W[i, j] = W_orig  # restore

    diff = np.linalg.norm(grad_analytic - grad_numeric)
    print("\nGradient Checking:")
    print("||Analytical - Numerical|| =", diff)

    if diff < 1e-6:
        print("Backpropagation is CORRECT")
    else:
        print("Backpropagation may be incorrect")

def plot_loss(losses):
    plt.figure()
    plt.plot(losses)
    plt.xlabel("Epoch")
    plt.ylabel("MSE Loss")
    plt.title("Autoencoder Training Loss")
    plt.grid()
    plt.show()


def visualize_reconstruction(autoencoder, X_test, n=5):
    preds = autoencoder.forward(X_test[:n])

    plt.figure(figsize=(10, 4))
    for i in range(n):
        # Original
        plt.subplot(2, n, i + 1)
        plt.imshow(X_test[i].reshape(28, 28), cmap="gray")
        plt.axis("off")
        if i == 0:
            plt.title("Original")

        # Reconstructed
        plt.subplot(2, n, i + 1 + n)
        plt.imshow(preds[i].reshape(28, 28), cmap="gray")
        plt.axis("off")
        if i == 0:
            plt.title("Reconstructed")

    plt.show()


# Run both parts


In [None]:
if __name__ == "__main__":
    print("=== Part 1: XOR ===")
    train_xor()
    gradient_check()
    print("\n=== Part 2: Autoencoder + SVM ===")
    encoder, decoder, autoencoder, X_test, y_test,losses = train_autoencoder(
        epochs=200, batch_size=256, lr=0.02, latent_dim=64
    )

    latent_train = extract_latent_features(encoder, X_test[:5000])
    latent_test  = extract_latent_features(encoder, X_test[5000:10000])

    y_train_small = y_test[:5000]
    y_test_small  = y_test[5000:10000]

    train_svm(latent_train, y_train_small, latent_test, y_test_small)
    plot_loss(losses)
    visualize_reconstruction(autoencoder, X_test)