In [None]:
import numpy as np
import tensorflow as tf

# Load MNIST dataset using TensorFlow
(X_train_full, y_train_full), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

# Preprocess the data
X_train_full = X_train_full.reshape(X_train_full.shape[0], -1) / 255.0  # Flatten and normalize
X_test = X_test.reshape(X_test.shape[0], -1) / 255.0                   # Flatten and normalize

# Split into training and validation sets
train_size = int(0.8 * X_train_full.shape[0])
X_train, X_val = X_train_full[:train_size], X_train_full[train_size:]
y_train, y_val = y_train_full[:train_size], y_train_full[train_size:]

# Activation functions
class ReLU:
    def forward(self, inputs):
        self.inputs = inputs
        self.output = np.maximum(0, inputs)

    def backward(self, dvalues):
        self.dinputs = dvalues.copy()
        self.dinputs[self.inputs <= 0] = 0  # Gradient is 1 where input > 0, else 0

class Softmax:
    def forward(self, inputs):
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        self.output = exp_values / np.sum(exp_values, axis=1, keepdims=True)

    def backward(self, dvalues):
        self.dinputs = np.empty_like(dvalues)
        for index, (single_output, single_dvalues) in enumerate(zip(self.output, dvalues)):
            single_output = single_output.reshape(-1, 1)
            jacobian_matrix = np.diagflat(single_output) - np.dot(single_output, single_output.T)
            self.dinputs[index] = np.dot(jacobian_matrix, single_dvalues)

# Layer class
class Layer:
    def __init__(self, n_in, n_out):
        # Xavier (Glorot) initialization
        limit = np.sqrt(6 / (n_in + n_out))
        self.weights = np.random.uniform(-limit, limit, (n_in, n_out))
        self.biases = np.zeros((1, n_out))

    def forward(self, inputs):
        self.inputs = inputs
        self.output = np.dot(inputs, self.weights) + self.biases

    def backward(self, dvalues):
        self.dweights = np.dot(self.inputs.T, dvalues)
        self.dbiases = np.sum(dvalues, axis=0, keepdims=True)
        self.dinputs = np.dot(dvalues, self.weights.T)

# Loss function with optional label smoothing
class CategoricalCrossEntropy:
    def __init__(self, num_classes, label_smoothing=0.0):
        self.num_classes = num_classes
        self.label_smoothing = label_smoothing

    def forward(self, y_pred, y_true):
        samples = len(y_pred)
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)  # Clip probabilities to avoid log(0)
        if len(y_true.shape) == 1:  # Integer labels
            correct_confidences = y_pred_clipped[range(samples), y_true]
        else:  # One-hot encoded labels
            if self.label_smoothing > 0.0:
                y_true = (1 - self.label_smoothing) * y_true + self.label_smoothing / y_true.shape[1]
            correct_confidences = np.sum(y_pred_clipped * y_true, axis=1)
        return -np.log(correct_confidences)

    def backward(self, dvalues, y_true):
        samples = len(dvalues)
        if len(y_true.shape) == 1:
            y_true = np.eye(self.num_classes)[y_true]  # Convert to one-hot encoding using num_classes
        self.dinputs = dvalues - y_true  # Gradient of softmax + cross-entropy
        self.dinputs = self.dinputs / samples

# Neural Network Class
class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size, learning_rate):
        self.layer1 = Layer(input_size, hidden_size)
        self.relu = ReLU()
        self.layer2 = Layer(hidden_size, output_size)
        self.softmax = Softmax()
        self.loss = CategoricalCrossEntropy(num_classes=output_size, label_smoothing=0.1)
        self.learning_rate = learning_rate

    def forward(self, X):
        self.layer1.forward(X)
        self.relu.forward(self.layer1.output)
        self.layer2.forward(self.relu.output)
        self.softmax.forward(self.layer2.output)
        return self.softmax.output

    def backward(self, X, y_true):
        self.loss.backward(self.softmax.output, y_true)
        self.softmax.backward(self.loss.dinputs)
        self.layer2.backward(self.softmax.dinputs)
        self.relu.backward(self.layer2.dinputs)
        self.layer1.backward(self.relu.dinputs)

        # Update weights and biases
        self.layer2.weights -= self.learning_rate * self.layer2.dweights
        self.layer2.biases -= self.learning_rate * self.layer2.dbiases
        self.layer1.weights -= self.learning_rate * self.layer1.dweights
        self.layer1.biases -= self.learning_rate * self.layer1.dbiases

    def train(self, X, y, epochs, batch_size=32):
        for epoch in range(epochs):
            # Shuffle the dataset
            indices = np.arange(X.shape[0])
            np.random.shuffle(indices)
            X_shuffled, y_shuffled = X[indices], y[indices]

            # Mini-batch gradient descent
            for i in range(0, X.shape[0], batch_size):
                X_batch = X_shuffled[i:i + batch_size]
                y_batch = y_shuffled[i:i + batch_size]

                # Forward pass
                y_pred = self.forward(X_batch)

                # Compute loss
                loss = np.mean(self.loss.forward(y_pred, y_batch))

                # Backward pass
                self.backward(X_batch, y_batch)

    def evaluate(self, X, y):
        # Forward pass
        y_pred = self.forward(X)
        predictions = np.argmax(y_pred, axis=1)  # Get predicted class labels
        accuracy = np.mean(predictions == y)     # Compare with true labels
        return accuracy

# Train the neural network
input_size = X_train.shape[1]         # Number of features (784 for MNIST)
hidden_size = 128                    # Number of neurons in the hidden layer
output_size = len(np.unique(y_train)) # Number of classes (10 for MNIST)

nn = NeuralNetwork(input_size, hidden_size, output_size, learning_rate=0.01)
nn.train(X_train, y_train, epochs=10, batch_size=32)

# Evaluate on the validation set
val_accuracy = nn.evaluate(X_val, y_val)
print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")

# Evaluate on the test set
test_accuracy = nn.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
Validation Accuracy: 92.34%
Test Accuracy: 92.19%
