# Library

In [None]:

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

tf.keras.backend.set_floatx('float64')

# PART 1: XOR NETWORK


In [None]:
# Architecture:
# Dense(2→4) → Tanh → Dense(4→1) → Sigmoid

def train_xor_keras():
    print("\n=== XOR (Keras) ===")

    X = np.array([[0.,0.],
                  [0.,1.],
                  [1.,0.],
                  [1.,1.]], dtype=np.float32)

    Y = np.array([[0.],
                  [1.],
                  [1.],
                  [0.]], dtype=np.float32)

    model = tf.keras.Sequential([
        tf.keras.layers.Dense(
            4,
            input_shape=(2,),
            activation="tanh",
            kernel_initializer=tf.keras.initializers.GlorotUniform()
        ),
        tf.keras.layers.Dense(
            1,
            activation="sigmoid"
        )
    ])

    model.compile(
        optimizer=tf.keras.optimizers.SGD(learning_rate=0.1),
        loss="mse"
    )
    model.fit(X, Y, epochs=10000, verbose=0)
    gradient_check_xor(model, X, Y)
    preds = model.predict(X)
    print("XOR raw predictions:")
    print(preds)
    print("XOR rounded predictions:")
    print((preds > 0.5).astype(int))



def gradient_check_xor(model, X, Y, epsilon=1e-4):
    """
    Numerical gradient checking for the XOR network.
    """

    print("\n=== Gradient Checking (XOR) ===")

    # Use MSE loss
    loss_fn = tf.keras.losses.MeanSquaredError()

    # Pick ONE weight tensor to test (first layer kernel)
    W = model.trainable_variables[0]

    # Compute analytical gradient using TensorFlow
    with tf.GradientTape() as tape:
        preds = model(X)
        loss = loss_fn(Y, preds)
    grad_analytical = tape.gradient(loss, W)

    # Convert to numpy for numerical computation
    W_np = W.numpy()
    grad_numerical = np.zeros_like(W_np)

    # Compute numerical gradients
    for i in range(W_np.shape[0]):
        for j in range(W_np.shape[1]):
            original_value = W_np[i, j]

            # W + epsilon
            W_np[i, j] = original_value + epsilon
            W.assign(W_np)
            loss_plus = loss_fn(Y, model(X)).numpy()

            # W - epsilon
            W_np[i, j] = original_value - epsilon
            W.assign(W_np)
            loss_minus = loss_fn(Y, model(X)).numpy()

            # Numerical gradient
            grad_numerical[i, j] = (loss_plus - loss_minus) / (2 * epsilon)

            # Restore original value
            W_np[i, j] = original_value

    # Restore original weights
    W.assign(W_np)

    # Compare gradients
    diff = np.linalg.norm(grad_analytical.numpy() - grad_numerical)
    norm = np.linalg.norm(grad_analytical.numpy()) + np.linalg.norm(grad_numerical)

    relative_error = diff / norm

    print("Analytical gradient:\n", grad_analytical.numpy())
    print("Numerical gradient:\n", grad_numerical)
    print("Relative error:", relative_error)

    if relative_error < 1e-1:
        print("Gradient check PASSED")
    else:
        print("Gradient check FAILED")


# PART 2: AUTOENCODER


In [None]:
# Architecture:
# Encoder: 784→512→128→latent
# Decoder: latent→128→512→784

def load_mnist_flat():
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    x_test = x_test[:5000]
    y_test = y_test[:5000]
    x_train = x_train.reshape(-1, 784).astype("float32") / 255.0
    x_test  = x_test.reshape(-1, 784).astype("float32") / 255.0
    return x_train, x_test, y_train, y_test


def build_autoencoder(latent_dim=64):

    encoder = tf.keras.Sequential([
        tf.keras.layers.Dense(
            512,
            activation="relu",
            input_shape=(784,),
            kernel_initializer="he_normal"
        ),
        tf.keras.layers.Dense(
            128,
            activation="relu",
            kernel_initializer="he_normal"
        ),
        tf.keras.layers.Dense(latent_dim)
    ])

    decoder = tf.keras.Sequential([
        tf.keras.layers.Dense(
            128,
            activation="relu",
            input_shape=(latent_dim,),
            kernel_initializer="he_normal"
        ),
        tf.keras.layers.Dense(
            512,
            activation="relu",
            kernel_initializer="he_normal"
        ),
        tf.keras.layers.Dense(
            784,
            activation="sigmoid"
        )
    ])

    autoencoder = tf.keras.Sequential([encoder, decoder])
    return encoder, decoder, autoencoder


def train_autoencoder():
    print("\n=== Autoencoder (Keras) ===")

    x_train, x_test, y_train, y_test = load_mnist_flat()
    encoder, decoder, autoencoder = build_autoencoder(latent_dim=64)

    autoencoder.compile(
        optimizer=tf.keras.optimizers.SGD(learning_rate=0.02),
        loss="mse"
    )

    history = autoencoder.fit(
        x_train,
        x_train,
        epochs=200,
        batch_size=256,
        shuffle=True,
        validation_data=(x_test, x_test)
    )

    return encoder, autoencoder, x_train, x_test, y_train, y_test, history

def train_svm_on_latent(encoder, X_train, y_train, X_test, y_test):
    """
    Train an SVM on the latent features extracted from the autoencoder.
    Evaluate test accuracy, confusion matrix, and classification metrics.
    """
    # Extract latent features
    latent_train = encoder.predict(X_train)
    latent_test = encoder.predict(X_test)

    # Train SVM (linear kernel for simplicity)
    svm = SVC(kernel='linear')
    svm.fit(latent_train, y_train)

    # Predict on test set
    y_pred = svm.predict(latent_test)

    # Metrics
    acc = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    report = classification_report(y_test, y_pred, digits=4)

    print("\n=== SVM on Autoencoder Latent Features ===")
    print(f"Test Accuracy: {acc:.4f}")
    print("\nConfusion Matrix:")
    print(cm)
    print("\nClassification Report:")
    print(report)



# VISUALIZATION


In [None]:

def plot_loss(history):
    plt.figure()
    plt.plot(history.history["loss"], label="Train")
    plt.plot(history.history["val_loss"], label="Validation")
    plt.xlabel("Epoch")
    plt.ylabel("MSE Loss")
    plt.title("Autoencoder Training Loss (Keras)")
    plt.legend()
    plt.grid()
    plt.show()


def visualize_reconstruction(autoencoder, X_test, n=5):
    preds = autoencoder.predict(X_test[:n])

    plt.figure(figsize=(10,4))
    for i in range(n):
        plt.subplot(2, n, i+1)
        plt.imshow(X_test[i].reshape(28,28), cmap="gray")
        plt.axis("off")
        if i == 0:
            plt.title("Original")

        plt.subplot(2, n, i+1+n)
        plt.imshow(preds[i].reshape(28,28), cmap="gray")
        plt.axis("off")
        if i == 0:
            plt.title("Reconstructed")
    plt.show()



# MAIN


In [None]:
if __name__ == "__main__":
    train_xor_keras()
    encoder, autoencoder, X_train, X_test, y_train, y_test, history = train_autoencoder()
    plot_loss(history)
    visualize_reconstruction(autoencoder, X_test)
    train_svm_on_latent(encoder, X_train, y_train, X_test, y_test)