<a href="https://colab.research.google.com/github/NeoZ666/classroom_DL_EXP/blob/main/DL_Exp2_25.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import random
import json

# the sigmoid function
def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

# the derivative of the sigmoid function
def sigmoid_prime(z):
    return sigmoid(z) * (1 - sigmoid(z))

# sigmoid vectors
sigmoid_vector = np.vectorize(sigmoid)
sigmoid_prime_vector = np.vectorize(sigmoid_prime)

# A class that implements stochastic gradient descent learning algorithm for a feedforward neural network
class NN:
    def __init__(self, sizes, optimizer="sgd", momentum=0.9):
        self.num_layers = len(sizes)
        self.sizes = sizes

        # the biases and weights for the network are initialized randomly, using a Gaussian distribution with mean 0, and variance 1
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])]

        # Initialize velocity for momentum and Nesterov
        self.vb = [np.zeros(b.shape) for b in self.biases]
        self.vw = [np.zeros(w.shape) for w in self.weights]

        self.optimizer = optimizer
        self.momentum = momentum

    # feedforward function - return the output of the network
    def feedforward(self, inputs):
        for b, w in zip(self.biases, self.weights):
            inputs = sigmoid_vector(np.dot(w, inputs) + b)
        return inputs

    # train function - train the neural network using mini-batch stochastic gradient descent
    # the training_data is a list of tuples "(x, y)" representing the training inputs and the desired outputs
    # if test_data is provided then the network will be evaluated against the test data after each epoch
    def train(self, training_data, epochs, mini_batch_size, eta, test_data=None):
        if test_data:
            n_test = len(test_data)

        n = len(training_data)
        for j in range(epochs):
            random.shuffle(training_data)

            mini_batches = [training_data[k:k + mini_batch_size] for k in range(0, n, mini_batch_size)]
            for mini_batch in mini_batches:
                self.update_mini_batch(mini_batch, eta)

            if test_data:
                print(f"Epoch {j}: {self.evaluate(test_data)} / {n_test}")
            else:
                print(f"Epoch {j} complete")

    # update_mini_batch function - update the network's weights and biases by applying gradient descent using backpropagation
    # to a single mini batch
    # the mini_batch is a list of tuples "(x, y)" and eta is the learning rate
    def update_mini_batch(self, mini_batch, eta):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]

        for x, y in mini_batch:
            delta_nabla_b, delta_nabla_w = self.backpropagate(x, y)
            nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)]
            nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)]

        if self.optimizer == "momentum":
            self.vb = [self.momentum * vb + eta * nb for vb, nb in zip(self.vb, nabla_b)]
            self.vw = [self.momentum * vw + eta * nw for vw, nw in zip(self.vw, nabla_w)]
            self.weights = [w - vw for w, vw in zip(self.weights, self.vw)]
            self.biases = [b - vb for b, vb in zip(self.biases, self.vb)]

        elif self.optimizer == "nesterov":
            prev_vb = self.vb
            prev_vw = self.vw
            self.vb = [self.momentum * vb + eta * nb for vb, nb in zip(self.vb, nabla_b)]
            self.vw = [self.momentum * vw + eta * nw for vw, nw in zip(self.vw, nabla_w)]
            self.weights = [w - (self.momentum * prev_vw + eta * nw) for w, prev_vw, nw in zip(self.weights, prev_vw, nabla_w)]
            self.biases = [b - (self.momentum * prev_vb + eta * nb) for b, prev_vb, nb in zip(self.biases, prev_vb, nabla_b)]

        else:  # Standard SGD
            self.weights = [w - (eta / len(mini_batch)) * nw for w, nw in zip(self.weights, nabla_w)]
            self.biases = [b - (eta / len(mini_batch)) * nb for b, nb in zip(self.biases, nabla_b)]

    # backpropagate function - returns a tuple "(nabla_b, nabla_w)" representing the gradient for the cost function C_x
    # nabla_b and nabla_w are layer-by-layer lists of numpy arrays, similar to self.biases and self.weights
    def backpropagate(self, x, y):
        nabla_b = [np.zeros(b.shape) for b in self.biases]
        nabla_w = [np.zeros(w.shape) for w in self.weights]

        # feedforward
        activation = x
        activations = [x]  # list to store all of the activations, layer by layer
        zs = []  # list to store all of the z vectors, layer by layer

        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, activation) + b
            zs.append(z)
            activation = sigmoid_vector(z)
            activations.append(activation)

        # backward pass
        delta = self.cost_derivative(activations[-1], y) * sigmoid_prime_vector(zs[-1])
        nabla_b[-1] = delta
        nabla_w[-1] = np.dot(delta, activations[-2].transpose())

        for l in range(2, self.num_layers):
            z = zs[-l]
            spv = sigmoid_prime_vector(z)
            delta = np.dot(self.weights[-l + 1].transpose(), delta) * spv
            nabla_b[-l] = delta
            nabla_w[-l] = np.dot(delta, activations[-l - 1].transpose())

        return (nabla_b, nabla_w)

    # evaluate function - return the number of test inputs for which the neural network outputs the correct result
    def evaluate(self, test_data):
        test_results = [(np.argmax(self.feedforward(x)), np.argmax(y)) for (x, y) in test_data]
        return sum(int(predicted == actual) for (predicted, actual) in test_results)

    # cost_derivative function - return the vector of partial derivatives for the output activations
    def cost_derivative(self, output_activations, y):
        return (output_activations - y)

    # save function - save the neural network to filename
    def save(self, filename):
        data = {
            "sizes": self.sizes,
            "weights": [w.tolist() for w in self.weights],
            "biases": [b.tolist() for b in self.biases]
        }

        with open(filename, "w") as handle:
            json.dump(data, handle)

# load function - load a neural network from the file filename
# returns a network instance
def load(filename):
    with open(filename, "r") as handle:
        data = json.load(handle)

    network = NN(data["sizes"])
    network.weights = [np.array(w) for w in data["weights"]]
    network.biases = [np.array(b) for b in data["biases"]]

    return network

# Example usage with MNIST dataset
from keras.datasets import mnist
from keras.utils import to_categorical

# Load the MNIST dataset
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Preprocess the data
train_images = train_images.reshape((60000, 784)).astype('float32') / 255
test_images = test_images.reshape((10000, 784)).astype('float32') / 255

train_labels = to_categorical(train_labels)
test_labels = to_categorical(test_labels)

# Convert the data to the required format
training_data = [(x.reshape(784, 1), y.reshape(10, 1)) for x, y in zip(train_images, train_labels)]
test_data = [(x.reshape(784, 1), y.reshape(10, 1)) for x, y in zip(test_images, test_labels)]

# Initialize the neural network with 784 input neurons, one hidden layer of 30 neurons, and 10 output neurons
nn = NN([784, 30, 10], optimizer="nesterov")  # You can also use "momentum"

# Train the network
nn.train(training_data, epochs=10, mini_batch_size=10, eta=3.0, test_data=test_data)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Epoch 0: 1009 / 10000
Epoch 1: 1009 / 10000
Epoch 2: 1009 / 10000
Epoch 3: 1009 / 10000
Epoch 4: 1009 / 10000
Epoch 5: 1009 / 10000
Epoch 6: 1009 / 10000
Epoch 7: 1009 / 10000
Epoch 8: 1009 / 10000
Epoch 9: 1009 / 10000
