In [None]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score


In [None]:
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score


# =========================================================
# Helper functions (UNCHANGED from original)
# =========================================================
def sigmoid(z):
    return 1. / (1. + np.exp(-z))

def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)

def int_to_onehot(y, num_labels):
    ary = np.zeros((y.shape[0], num_labels))
    for i, val in enumerate(y):
        ary[i, val] = 1
    return ary

def minibatch_generator(X, y, minibatch_size):
    indices = np.arange(X.shape[0])
    np.random.shuffle(indices)
    for start_idx in range(0, indices.shape[0] - minibatch_size + 1, minibatch_size):
        batch_idx = indices[start_idx:start_idx + minibatch_size]
        yield X[batch_idx], y[batch_idx]


# =========================================================
# Neural network with TWO hidden layers (500, 500)
# =========================================================
class NeuralNetMLP2:

    def __init__(self, num_features, num_hidden1, num_hidden2, num_classes, random_seed=1):

        rng = np.random.RandomState(random_seed)

        self.num_classes = num_classes

        self.weight_h1 = rng.normal(0.0, 0.1, (num_hidden1, num_features))
        self.bias_h1   = np.zeros(num_hidden1)

        self.weight_h2 = rng.normal(0.0, 0.1, (num_hidden2, num_hidden1))
        self.bias_h2   = np.zeros(num_hidden2)

        self.weight_out = rng.normal(0.0, 0.1, (num_classes, num_hidden2))
        self.bias_out   = np.zeros(num_classes)

    def forward(self, x):

        z_h1 = np.dot(x, self.weight_h1.T) + self.bias_h1
        a_h1 = sigmoid(z_h1)

        z_h2 = np.dot(a_h1, self.weight_h2.T) + self.bias_h2
        a_h2 = sigmoid(z_h2)

        z_out = np.dot(a_h2, self.weight_out.T) + self.bias_out
        a_out = softmax(z_out)

        return a_h1, a_h2, a_out

    def backward(self, x, a_h1, a_h2, a_out, y):

        y_onehot = int_to_onehot(y, self.num_classes)
        n = y.shape[0]

        # MSE derivative
        dL_da = 2. * (a_out - y_onehot) / n

        # Backprop through softmax
        tmp = np.sum(dL_da * a_out, axis=1, keepdims=True)
        delta_out = a_out * (dL_da - tmp)

        grad_W_out = np.dot(delta_out.T, a_h2)
        grad_b_out = np.sum(delta_out, axis=0)

        delta_h2 = np.dot(delta_out, self.weight_out) * a_h2 * (1. - a_h2)
        grad_W_h2 = np.dot(delta_h2.T, a_h1)
        grad_b_h2 = np.sum(delta_h2, axis=0)

        delta_h1 = np.dot(delta_h2, self.weight_h2) * a_h1 * (1. - a_h1)
        grad_W_h1 = np.dot(delta_h1.T, x)
        grad_b_h1 = np.sum(delta_h1, axis=0)

        return (grad_W_out, grad_b_out,
                grad_W_h2, grad_b_h2,
                grad_W_h1, grad_b_h1)


# =========================================================
# Book-style evaluation function (UNCHANGED)
# =========================================================
def compute_mse_and_acc(model, X, y, minibatch_size=100):

    mse, correct, n = 0., 0, 0
    minibatch_gen = minibatch_generator(X, y, minibatch_size)

    for i, (features, targets) in enumerate(minibatch_gen):

        _, _, probas = model.forward(features)
        onehot = int_to_onehot(targets, model.num_classes)

        mse += np.mean((onehot - probas) ** 2)
        correct += (np.argmax(probas, axis=1) == targets).sum()
        n += targets.shape[0]

    mse /= (i + 1)
    acc = correct / n

    return mse, acc


# =========================================================
# Training function (NO validation)
# =========================================================
def train(model, X_train, y_train, X_test, y_test, num_epochs, # Added X_test, y_test parameters
          learning_rate=0.1, minibatch_size=100):

    epoch_train_acc = [] # Initialize list to store training accuracies
    epoch_test_acc = []  # Initialize list to store test accuracies

    for epoch in range(num_epochs):

        minibatch_gen = minibatch_generator(X_train, y_train, minibatch_size)

        for X_mb, y_mb in minibatch_gen:

            a_h1, a_h2, a_out = model.forward(X_mb)

            grads = model.backward(X_mb, a_h1, a_h2, a_out, y_mb)

            (dW_out, db_out,
             dW_h2, db_h2,
             dW_h1, db_h1) = grads

            model.weight_out -= learning_rate * dW_out
            model.bias_out   -= learning_rate * db_out
            model.weight_h2  -= learning_rate * dW_h2
            model.bias_h2    -= learning_rate * db_h2
            model.weight_h1  -= learning_rate * dW_h1
            model.bias_h1    -= learning_rate * db_h1

        train_mse, train_acc = compute_mse_and_acc(model, X_train, y_train)
        test_mse, test_acc = compute_mse_and_acc(model, X_test, y_test) # Compute test accuracy

        epoch_train_acc.append(train_acc) # Store training accuracy
        epoch_test_acc.append(test_acc)   # Store test accuracy

        print(f'Epoch {epoch+1:02d}/{num_epochs} | '
              f'Train MSE: {train_mse:.4f} | '
              f'Train Acc: {train_acc*100:.2f}% | '
              f'Test Acc: {test_acc*100:.2f}%') # Added test accuracy to print

    return epoch_train_acc, epoch_test_acc # Return collected accuracies


# =========================================================
# Load MNIST + 70/30 split
# =========================================================
X, y = fetch_openml("mnist_784", version=1, return_X_y=True)
X = X.values.astype(np.float32)
y = y.astype(int).values

X = ((X / 255.0) - .5) * 2

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.30, random_state=1, stratify=y
)


# =========================================================
# Run training
# =========================================================
model = NeuralNetMLP2(
    num_features=784,
    num_hidden1=500,
    num_hidden2=500,
    num_classes=10
)

epoch_train_acc, epoch_test_acc = train( # Assigned returned values to new variables
    model,
    X_train,
    y_train,
    X_test, y_test, # Passed X_test, y_test to the train function
    num_epochs=50,
    learning_rate=0.1,
    minibatch_size=100
)


# =========================================================
# Test evaluation (EXACT book style)
# =========================================================
test_mse, test_acc = compute_mse_and_acc(model, X_test, y_test)
print(f'Test accuracy: {test_acc*100:.2f}%')
print(f'Test MSE: {test_mse:.6f}')


# =========================================================
# Macro AUC (OvR)
# =========================================================
_, _, p_test = model.forward(X_test)
macro_auc = roc_auc_score(y_test, p_test, multi_class="ovr", average="macro")
print(f'Macro AUC: {macro_auc:.6f}')

Epoch 01/50 | Train MSE: 0.0248 | Train Acc: 85.15% | Test Acc: 85.06%
Epoch 02/50 | Train MSE: 0.0177 | Train Acc: 88.95% | Test Acc: 88.82%
Epoch 03/50 | Train MSE: 0.0154 | Train Acc: 90.23% | Test Acc: 90.24%
Epoch 04/50 | Train MSE: 0.0134 | Train Acc: 91.56% | Test Acc: 91.32%
Epoch 05/50 | Train MSE: 0.0129 | Train Acc: 91.72% | Test Acc: 91.48%
Epoch 06/50 | Train MSE: 0.0124 | Train Acc: 92.11% | Test Acc: 91.80%
Epoch 07/50 | Train MSE: 0.0113 | Train Acc: 92.81% | Test Acc: 92.55%
Epoch 08/50 | Train MSE: 0.0109 | Train Acc: 93.22% | Test Acc: 92.83%
Epoch 09/50 | Train MSE: 0.0103 | Train Acc: 93.51% | Test Acc: 93.03%
Epoch 10/50 | Train MSE: 0.0102 | Train Acc: 93.57% | Test Acc: 93.26%
Epoch 11/50 | Train MSE: 0.0098 | Train Acc: 93.76% | Test Acc: 93.20%
Epoch 12/50 | Train MSE: 0.0093 | Train Acc: 94.24% | Test Acc: 93.70%
Epoch 13/50 | Train MSE: 0.0087 | Train Acc: 94.61% | Test Acc: 93.98%
Epoch 14/50 | Train MSE: 0.0085 | Train Acc: 94.76% | Test Acc: 94.12%
Epoch 

In [None]:
test_mse, test_acc = compute_mse_and_acc(model, X_test, y_test, minibatch_size=100)

test_acc_percent = test_acc * 100

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(8, 5))

plt.plot(range(1, len(epoch_train_acc) + 1),
         epoch_train_acc,
         label='Training')

plt.plot(range(1, len(epoch_test_acc) + 1),
         epoch_test_acc,
         label='Test')

plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training vs Test Accuracy')
plt.legend(loc='lower right')
plt.grid(alpha=0.3)

plt.show()




NameError: name 'epoch_train_acc' is not defined

<Figure size 800x500 with 0 Axes>