In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split
from tensorflow.keras.datasets import mnist

# Import your custom neural-network library
from lib.layers import Dense
from lib.activations import ReLU, Sigmoid, Tanh
from lib.losses import MSELoss
from lib.optimizer import SGD
from lib.network import Sequential

np.random.seed(0)



In [None]:
def plot_loss(losses, title="Loss Curve"):
    plt.figure(figsize=(6,4))
    plt.plot(losses)
    plt.title(title)
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.grid()
    plt.show()


In [None]:
def numerical_gradient(model, X, Y, loss_fn, epsilon=1e-5):
    """
    Performs gradient check on first Dense layer only.
    """
    # Forward + backward to get analytical gradients
    y_pred = model.forward(X)
    _ = loss_fn.forward(Y, y_pred)
    grad = loss_fn.backward(Y, y_pred)
    model.backward(grad)

    dense = None
    for layer in model.layers:
        if isinstance(layer, Dense):
            dense = layer
            break

    errors = []

    for W in [dense.W, dense.b]:
        it = np.nditer(W, flags=['multi_index'], op_flags=['readwrite'])
        while not it.finished:
            idx = it.multi_index
            old_val = W[idx]

            # +epsilon
            W[idx] = old_val + epsilon
            y_pos = model.forward(X)
            loss_pos = loss_fn.forward(Y, y_pos)

            # -epsilon
            W[idx] = old_val - epsilon
            y_neg = model.forward(X)
            loss_neg = loss_fn.forward(Y, y_neg)

            # reset
            W[idx] = old_val

            numerical = (loss_pos - loss_neg) / (2 * epsilon)
            analytical = dense.grad_W[idx] if W is dense.W else dense.grad_b[idx]

            rel_error = abs(numerical - analytical) / (abs(numerical) + abs(analytical) + 1e-12)
            errors.append(rel_error)

            it.iternext()

    return np.max(errors)


In [None]:
# Tiny test network
model = Sequential([
    Dense(2, 3),
    Tanh(),
    Dense(3, 1),
    Sigmoid(),
])

loss_fn = MSELoss()

# Small input
X = np.array([[0.1, -0.2]])
Y = np.array([[1]])

max_error = numerical_gradient(model, X, Y, loss_fn)
max_error


In [None]:
X_xor = np.array([[0,0],[0,1],[1,0],[1,1]])
Y_xor = np.array([[0],[1],[1],[0]])


In [None]:
xor_model = Sequential([
    Dense(2, 4),
    Tanh(),
    Dense(4, 1),
    Sigmoid(),
])

loss_fn = MSELoss()
optimizer = SGD(lr=0.1)


In [None]:
epochs = 5000
losses = []

for epoch in range(epochs):
    y_pred = xor_model.forward(X_xor)
    loss = loss_fn.forward(Y_xor, y_pred)
    grad = loss_fn.backward(Y_xor, y_pred)
    xor_model.backward(grad, lr=0.1)
    losses.append(loss)

plot_loss(losses, "XOR Training Loss")
print("Predictions:")
print(xor_model.forward(X_xor))


In [None]:
(x_train, _), (x_test, y_test_labels) = mnist.load_data()

x_train = x_train.astype("float32") / 255.0
x_test  = x_test.astype("float32") / 255.0

# Flatten to vectors: 28×28 → 784
x_train = x_train.reshape(-1, 784)
x_test  = x_test.reshape(-1, 784)


In [None]:
encoder_dim = 32

autoencoder = Sequential([
    Dense(784, 256),
    ReLU(),
    Dense(256, encoder_dim),   # Latent
    ReLU(),
    Dense(encoder_dim, 256),
    ReLU(),
    Dense(256, 784),
    Sigmoid(),
])

loss_fn = MSELoss()


In [None]:
epochs = 10
batch_size = 256
losses = []

for epoch in range(epochs):
    perm = np.random.permutation(len(x_train))
    x_train_shuffled = x_train[perm]

    epoch_loss = 0
    for i in range(0, len(x_train), batch_size):
        Xb = x_train_shuffled[i:i+batch_size]

        y_pred = autoencoder.forward(Xb)
        loss = loss_fn.forward(Xb, y_pred)
        grad = loss_fn.backward(Xb, y_pred)
        autoencoder.backward(grad, lr=0.01)

        epoch_loss += loss

    losses.append(epoch_loss / (len(x_train)//batch_size))
    print(f"Epoch {epoch+1}/{epochs} — Loss: {losses[-1]}")

plot_loss(losses, "Autoencoder Training Loss")


In [None]:
n = 10
samples = x_test[:n]
recons = autoencoder.forward(samples)

plt.figure(figsize=(12,4))
for i in range(n):
    # original
    plt.subplot(2,n,i+1)
    plt.imshow(samples[i].reshape(28,28), cmap='gray')
    plt.axis("off")
    # reconstructed
    plt.subplot(2,n,i+1+n)
    plt.imshow(recons[i].reshape(28,28), cmap='gray')
    plt.axis("off")
plt.show()


In [None]:
def encode(model, X):
    """
    Runs only encoder part:
    Dense(784→256) → ReLU → Dense(256→32) → ReLU
    """
    out = model.layers[0].forward(X)
    out = model.layers[1].forward(out)
    out = model.layers[2].forward(out)
    out = model.layers[3].forward(out)
    return out

X_train_latent = encode(autoencoder, x_train)
X_test_latent  = encode(autoencoder, x_test)

print("Latent shape:", X_train_latent.shape)


In [None]:
clf = svm.SVC(kernel="rbf")
clf.fit(X_train_latent[:20000], y_test_labels[:20000])   # train on subset for speed

y_pred = clf.predict(X_test_latent)
acc = accuracy_score(y_test_labels, y_pred)
acc


In [None]:
cm = confusion_matrix(y_test_labels, y_pred)
plt.figure(figsize=(6,6))
plt.imshow(cm)
plt.title("Confusion Matrix")
plt.colorbar()
plt.show()
