**Disclaimer**

Used Youtube channel 'Sentdex' to accomplish this code. 

In [1]:
import numpy as np
np.random.seed(2305)

# Input "layer"
class Layer_Input:
    # Forward pass
    def forward(self, inputs, training):
        pass

In [2]:
# Dense layer
class Layer_Dense:
    # Initialize weights and biases
        
    # Layer initialization
    def __init__(self, n_inputs, n_neurons):
        # Initialize weights and biases
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))
    
    # Forward pass
    def forward(self, inputs):
        # Remember input values
        self.inputs = inputs
        # Calculate output values from inputs, weights and biases
        self.output = np.dot(inputs, self.weights) + self.biases
    
    # Backward pass
    def backward(self, dvalues):
        # Gradients on parameters
        self.dweights = np.dot(self.inputs.T, dvalues)
        self.dbiases = np.sum(dvalues, axis=0, keepdims=True)
        # Gradient on values
        self.dinputs = np.dot(dvalues, self.weights.T)

    # Retrieve layer parameters
    def get_parameters(self):
        pass

    # Set weights and biases in a layer instance
    def set_parameters(self, weights, biases):
        pass

In [3]:
# ReLU activation

class Activation_ReLU:

    # Forward pass
    def forward(self, inputs):
        # Remember input values
        self.inputs = inputs
        # Calculate output values from inputs
        self.output = np.maximum(0, inputs)
    
    # Backward pass
    def backward(self, dvalues):
        # Since we need to modify original variable,
        # letâ€™s make a copy of values first
        self.dinputs = dvalues.copy()
        # Zero gradient where input values were negative
        self.dinputs[self.inputs <= 0] = 0

    # Calculate predictions for outputs
    def predictions(self, outputs):
        pass

In [4]:
# Softmax activation

class Activation_Softmax:

# Forward pass
    def forward(self, inputs):
        # Remember input values
        self.inputs = inputs
        # Get unnormalized probabilities
        exp_values = np.exp(inputs - np.max(inputs, axis=1,
        keepdims=True))
        # Normalize them for each sample
        probabilities = exp_values / np.sum(exp_values, axis=1,
        keepdims=True)
        self.output = probabilities
    
    # Backward pass
    def backward(self, dvalues):
        # Create uninitialized array
        self.dinputs = np.empty_like(dvalues)
        
        # Enumerate outputs and gradients
        for index, (single_output, single_dvalues) in enumerate(zip(self.output, dvalues)):
            # Flatten output array
            single_output = single_output.reshape(-1, 1)
            # Calculate Jacobian matrix of the output
            jacobian_matrix = np.diagflat(single_output) - np.dot(single_output, single_output.T)
            # Calculate sample-wise gradient
            # and add it to the array of sample gradients
            self.dinputs[index] = np.dot(jacobian_matrix, single_dvalues)

    # Calculate predictions for outputs
    def predictions(self, outputs):
        pass

In [5]:
# SGD optimizer
class Optimizer_SGD:

    # Initialize optimizer - set settings,
    # learning rate of 1. is default for this optimizer
    def __init__(self, learning_rate=1., decay=0., momentum=0.):
        self.learning_rate = learning_rate
        self.current_learning_rate = learning_rate
        self.decay = decay
        self.iterations = 0
        self.momentum = momentum

    # Call once before any parameter updates
    def pre_update_params(self):
        if self.decay:
            self.current_learning_rate = self.learning_rate * \
                (1. / (1. + self.decay * self.iterations))

    # Update parameters
    def update_params(self, layer):

        # If we use momentum
        if self.momentum:

            # If layer does not contain momentum arrays, create them
            # filled with zeros
            if not hasattr(layer, 'weight_momentums'):
                layer.weight_momentums = np.zeros_like(layer.weights)
                # If there is no momentum array for weights
                # The array doesn't exist for biases yet either.
                layer.bias_momentums = np.zeros_like(layer.biases)

            # Build weight updates with momentum - take previous
            # updates multiplied by retain factor and update with
            # current gradients
            weight_updates = \
                self.momentum * layer.weight_momentums - \
                self.current_learning_rate * layer.dweights
            layer.weight_momentums = weight_updates

            # Build bias updates
            bias_updates = \
                self.momentum * layer.bias_momentums - \
                self.current_learning_rate * layer.dbiases
            layer.bias_momentums = bias_updates

        # Vanilla SGD updates (as before momentum update)
        else:
            weight_updates = -self.current_learning_rate * \
                             layer.dweights
            bias_updates = -self.current_learning_rate * \
                           layer.dbiases

        # Update weights and biases using either
        # vanilla or momentum updates
        layer.weights += weight_updates
        layer.biases += bias_updates


    # Call once after any parameter updates
    def post_update_params(self):
        self.iterations += 1

In [6]:
# Common accuracy class
class Accuracy:

    # Calculates an accuracy given predictions and ground truth values
    def calculate(self, predictions, y):

        # Get comparison results
        

        # Calculate an accuracy
        

        # Add accumulated sum of matching values and sample count
        

        # Return accuracy
        pass

    # Calculates accumulated accuracy
    def calculate_accumulated(self):

        # Calculate an accuracy
        

        # Return the data and regularization losses
        pass

    # Reset variables for accumulated accuracy
    def new_pass(self):
        pass

In [7]:
class Accuracy_Categorical(Accuracy):

    def __init__(self, *, binary=False):
        # Binary mode?
        pass

    # No initialization is needed
    def init(self, y):
        pass

    # Compares predictions to the ground truth values
    def compare(self, predictions, y):
        # check if in binary mode
        if not self.binary and len(y.shape) == 2:
            # compare with maximum
            pass
        pass

In [8]:
# Common loss class
class Loss:

    # Calculates the data and regularization losses given model output and ground truth values
    def calculate(self, output, y):

        # Calculate sample losses
        sample_losses = self.forward(output, y)
        # Calculate mean loss
        data_loss = np.mean(sample_losses)
        # Return loss
        return data_loss

    # Calculates accumulated loss
    def calculate_accumulated(self):
        # Calculate mean loss
        
        # Return the data loss
        pass

    # Reset variables for accumulated loss
    def new_pass(self):
        pass

    # Set/remember trainable layers
    def remember_trainable_layers(self, trainable_layers):
        pass

In [9]:
# Cross-entropy loss
class Loss_CategoricalCrossentropy(Loss):

    # Forward pass
    def forward(self, y_pred, y_true):

        # Number of samples in a batch
        samples = len(y_pred)
        # Clip data to prevent division by 0
        # Clip both sides to not drag mean towards any value
        y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)

        # Probabilities for target values -
        # only if categorical labels
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[
                range(samples),
                y_true
            ]

        # Mask values - only for one-hot encoded labels
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(
                y_pred_clipped * y_true,
                axis=1
            )

        # Losses
        negative_log_likelihoods = -np.log(correct_confidences)
        return negative_log_likelihoods

    # Backward pass
    def backward(self, dvalues, y_true):

        # Number of samples
        samples = len(dvalues)
        # Number of labels in every sample
        # We'll use the first sample to count them
        labels = len(dvalues[0])

        # If labels are sparse, turn them into one-hot vector
        if len(y_true.shape) == 1:
            y_true = np.eye(labels)[y_true]

        # Calculate gradient
        self.dinputs = -y_true / dvalues
        # Normalize gradient
        self.dinputs = self.dinputs / samples


In [10]:
class Activation_Softmax_Loss_CategoricalCrossentropy():
    # Creates activation and loss function objects
    def __init__(self):
        self.activation = Activation_Softmax()
        self.loss = Loss_CategoricalCrossentropy()

    # Forward pass
    def forward(self, inputs, y_true):
        # Output layer's activation function
        self.activation.forward(inputs)
        # Set the output
        self.output = self.activation.output
        # Calculate and return loss value
        return self.loss.calculate(self.output, y_true)
    
    # Backward pass
    def backward(self, dvalues, y_true):

        # Number of samples
        samples = len(dvalues)

        # If labels are one-hot encoded, turn them into discrete values
        if len(y_true.shape) == 2:
            y_true = np.argmax(y_true, axis=1)

        # Copy so we can safely modify
        self.dinputs = dvalues.copy()

        # Calculate gradient
        self.dinputs[range(samples), y_true] -= 1

        # Normalize gradient
        self.dinputs = self.dinputs / samples

In [11]:
# # Model class
# class Model:

#     def __init__(self):
#         # Create a list of network objects
        
#         # Softmax classifier's output object
#         pass

#     # Add objects to the model
#     def add(self, layer):
#         pass

#     # Set loss, optimizer and accuracy
#     def set(self, *, loss=None, optimizer=None, accuracy=None):
#         if loss is not None:
#             pass
#         if optimizer is not None:
#             pass
#         if accuracy is not None:
#             pass

#     # Finalize the model
#     def finalize(self):
#         # Create and set the input layer
#         self.input_layer = Layer_Input()

#         # Count all the objects
#         layer_count = len(self.layers)

#         # Initialize a list containing trainable layers:
#         self.trainable_layers = []

#         # Iterate the objects
#         for i in range(layer_count):

#             # If it's the first layer, the previous layer object is the input layer
#             if i == 0:
#                 self.layers[i].prev = self.input_layer
#                 self.layers[i].next = self.layers[i+1]

#             # All layers except for the first and the last
#             elif i < layer_count - 1:
#                 self.layers[i].prev = self.layers[i-1]
#                 self.layers[i].next = self.layers[i+1]

#             # The last layer - the next object is the loss
#             # Also let's save aside the reference to the last object whose output is the model's output
#             else:
#                 self.layers[i].prev = self.layers[i-1]
#                 self.layers[i].next = self.loss
#                 self.output_layer_activation = self.layers[i]

#             # If layer contains an attribute called "weights", it's a trainable layer - add it to the list of trainable layers
#             # We don't need to check for biases - checking for weights is enough
#             if hasattr(self.layers[i], 'weights'):
#                 self.trainable_layers.append(self.layers[i])

#         # Update loss object with trainable layers
#         if self.loss is not None:
#             self.loss.remember_trainable_layers(self.trainable_layers)

#         # If output activation is Softmax and loss function is Categorical Cross-Entropy
#         # create an object of combined activation and loss function containing faster gradient calculation
#         if isinstance(self.layers[-1], Activation_Softmax) and isinstance(self.loss, Loss_CategoricalCrossentropy):
#             # Create an object of combined activation and loss functions
#             self.softmax_classifier_output = Activation_Softmax_Loss_CategoricalCrossentropy()

#     # Performs forward pass
#     def forward(self, X, training):

#         # Call forward method on the input layer this will set the output property that the first layer in "prev" object is expecting
        

#         # Call forward method of every object in a chain, pass output of the previous object as a parameter
#         for layer in self.layers:
#             pass

#         # "layer" is now the last object from the list, return its output
#         return layer.output

#     # Train the model
#     def train(self, X, y, *, epochs=1, batch_size=None, print_every=1, validation_data=None):

#         # Initialize accuracy object
        

#         # Default value if batch size is not being set
        

#         # If there is validation data passed, set default number of steps for validation as well
#         if validation_data is not None:
            
#             # For better readability split validation data into X and y values
#             pass

#         # Calculate number of steps
#         if batch_size is not None:
#             # calculate the number of training steps according to data and batch size
            

#             # Dividing rounds down. If there are some remaining data, but not a full batch, this won't include it
#             # Add `1` to include this not full batch
#             if train_steps * batch_size < len(X):
#                 pass
            
#             # if there is validation data split it like the training data into batches
#             if validation_data is not None:
                

#                 # Dividing rounds down. If there are some remaining data, but nor full batch, this won't include it
#                 # Add `1` to include this not full batch
#                 if validation_steps * batch_size < len(X_val):
#                     pass

#         # Main training loop
#         for epoch in range(1, epochs+1):

#             # Print epoch number
            

#             # Reset accumulated values in loss and accuracy objects
            

#             # Iterate over steps
#             for step in range(train_steps):
#                 # If batch size is not set - train using one step and full dataset
#                 if batch_size is None:
#                     pass

#                 # Otherwise slice a batch
#                 else:
#                     pass

#                 # Perform the forward pass
                

#                 # Calculate loss
                

#                 # Get predictions and calculate an accuracy
                

#                 # Perform backward pass
                

#                 # Optimize (update parameters)
#                 # first call pre-update
#                 self.optimizer.pre_update_params()
#                 # now update params for each layer
#                 for layer in self.trainable_layers:
#                     pass
#                 # now call post-update params
                

#                 # Print a summary
#                 if not step % print_every or step == train_steps - 1:
#                     pass

#             # Get and print epoch loss and accuracy
#             pass

#             # If there is the validation data
#             if validation_data is not None:

#                 # Evaluate the model:
#                 pass

#     # Performs backward pass
#     def backward(self, output, y):

#         # If softmax classifier
#         if self.softmax_classifier_output is not None:

#             # First call backward method on the combined activation/loss
#             # this will set dinputs property
            

#             # Since we'll not call backward method of the last layer which is Softmax activation
#             # as we used combined activation/loss object, let's set dinputs in this object
            

#             # Call backward method going through all the objects but last
#             # in reversed order passing dinputs as a parameter
#             for layer in reversed(self.layers[:-1]):
#                 pass
#             return

#         # First call backward method on the loss this will set dinputs property that the last
#         # layer will try to access shortly
#         pass

#         # Call backward method going through all the objects
#         # in reversed order passing dinputs as a parameter
#         for layer in reversed(self.layers):
#             pass

#     # Evaluates the model using passed-in dataset
#     def evaluate(self, X_val, y_val, *, batch_size=None):

#         # Default value if batch size is not being set
        

#         # Calculate number of steps
#         if batch_size is not None:
            
#             # calculate validation steps
            

#             # Dividing rounds down. If there are some remaining data, but not a full batch, this won't include it
#             # Add `1` to include this not full batch
#             if validation_steps * batch_size < len(X_val):
#                 pass

#         # Reset accumulated values in loss and accuracy objects (new_pass)
#         pass

#         # Iterate over steps
#         for step in range(validation_steps):

#             # If batch size is not set - train using one step and full dataset
#             if batch_size is None:
#                 pass

#             # Otherwise slice a batch
#             else:
#                 pass

#             # Perform the forward pass
            

#             # Calculate the loss
            

#             # Get predictions and calculate an accuracy
#             pass

#         # Get and print validation loss and accuracy
        

#         # Print a summary
#         pass

#     # Retrieves and returns parameters of trainable layers
#     def get_parameters(self):

#         # Create a list for parameters
        

#         # Iterable trainable layers and get their parameters
#         for layer in self.trainable_layers:
#             pass

#         # Return a list
#         pass

#     # Updates the model with new parameters
#     def set_parameters(self, parameters):

#         # Iterate over the parameters and layers and update each layers with each set of the parameters
#         for parameter_set, layer in zip(parameters, self.trainable_layers):
#             layer.set_parameters(*parameter_set)

#     # Predicts on the samples

#     def predict(self, X, *, batch_size=None):

#         # Default value if batch size is not being set
        

#         # Calculate number of steps
#         if batch_size is not None:
            
#             # calculate prediction steps according to the batch size
            

#             # Dividing rounds down. If there are some remaining data, but not a full batch, this won't include it
#             # Add `1` to include this not full batch
#             if prediction_steps * batch_size < len(X):
#                 pass

#         # Model outputs - create empty array
        

#         # Iterate over steps
#         for step in range(prediction_steps):

#             # If batch size is not set - train using one step and full dataset
#             if batch_size is None:
#                 pass

#             # Otherwise slice a batch
#             else:
#                 batch_X = X[step*batch_size:(step+1)*batch_size]

#             # Perform the forward pass
#             pass

#             # Append batch prediction to the list of predictions
            

#         # Stack and return results
#         return np.vstack(output)

# # MNIST dataset (train + test)

# def download_minist_dataset():
#     # downloads and unzips the mnist dataset to the computer
    
#     import os
#     import urllib.request
#     import urllib 
#     from zipfile import ZipFile
#     URL = 'https://nnfs.io/datasets/fashion_mnist_images.zip'
#     FILE = 'fashion_mnist_images.zip'
#     FOLDER = 'fashion_mnist_images'
#     if not os.path.isfile(FILE):
#         print(f'Downloading {URL} and saving as {FILE}...')
#         urllib.request.urlretrieve(URL, FILE)
#     print('Unzipping images...')
#     with ZipFile(FILE) as zip_images:
#         zip_images.extractall(FOLDER)
#     print('Done!')

# def load_mnist_dataset(dataset, path):
#     # loads and preprocesses the mnist dataset

#     import cv2 # to install: pip install opencv-python
#     import os
#     # Scan all the directories and create a list of labels
#     labels = os.listdir(os.path.join(path, dataset))
#     # Create lists for samples and labels
#     X = []
#     y = []
#     # For each label folder
#     for label in labels:
#         # And for each image in given folder
#         for file in os.listdir(os.path.join(path, dataset, label)):
#             # Read the image
#             image = cv2.imread(os.path.join(
#                 path, dataset, label, file), cv2.IMREAD_UNCHANGED)
#             # And append it and a label to the lists
#             X.append(image)
#             y.append(label)
#     # Convert the data to proper numpy arrays and return
#     return np.array(X), np.array(y).astype('uint8')

# def create_data_mnist(path):
#     # Load both sets separately
#     X, y = load_mnist_dataset('train', path)
#     X_test, y_test = load_mnist_dataset('test', path)
#     # And return all the data
#     return X, y, X_test, y_test

# print("we here 1")
# ## Main code to run everything

# # Create dataset
# download_minist_dataset()
# X, y, X_test, y_test = create_data_mnist('fashion_mnist_images')

# # Shuffle the training dataset
# pass

# # Scale and reshape samples
# pass

# # Instantiate the model
# model = Model()
# # Add layers

# # make sure you have the right output size that matches the number of classes in the trainingset
# # model.add(Layer_Dense(X.shape[1],))
# # model.add(Activation_ReLU())
# # model.add(Layer_Dense())
# # model.add(Activation_ReLU())
# # model.add(Layer_Dense(, 10))
# # model.add(Activation_Softmax())

# # Set loss, optimizer and accuracy objects - remember to change the values to your needs
# model.set(loss=Loss_CategoricalCrossentropy(),
#           optimizer=Optimizer_SGD(
#               learning_rate=0,
#               decay=0,
#               momentum=0),
#           accuracy=Accuracy_Categorical())
# print("we here 2")
# # Finalize the model
# # model.finalize()
# # Train the model
# print("we here 3")
# model.train(X,
#             y,
#             validation_data=(X_test,
#                              y_test),
#             epochs=0,
#             batch_size=0,
#             print_every=0)

In [83]:

# # Rectified Linear Activation Function
# class Activation_ReLU:
#     # Forward pass
#     def forward(self, inputs):
#         self.output = np.maximum(0, inputs) 
#         # if input > 0 then output is input otherwise output is 0 
        
# Softmax activation
# # In softmax activation we will take the normalized exponential values of the values given
# class Activation_Softmax:
#     # Forward pass
#     def forward(self, inputs):
#         # Get unnormalized probabilities
#         exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
#         # we are substracting the max value to prevent the overflow error
#         # axis = 1 makes the operation rowwise
#         # keepdims is Keep dimensions so the output will have same dimension as input
        
#         # Normalize them for each sample
#         probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)
#         self.output = probabilities

# MNIST dataset (train + test)

def download_minist_dataset():
    # downloads and unzips the mnist dataset to the computer    
    import os
    import urllib.request
    import urllib 
    from zipfile import ZipFile
    URL = 'https://nnfs.io/datasets/fashion_mnist_images.zip'
    FILE = 'fashion_mnist_images.zip'
    FOLDER = 'fashion_mnist_images'
    if not os.path.isfile(FILE):
        print(f'Downloading {URL} and saving as {FILE}...')
        urllib.request.urlretrieve(URL, FILE)
    print('Unzipping images...')
    with ZipFile(FILE) as zip_images:
        zip_images.extractall(FOLDER)
    print('Done!')

def load_mnist_dataset(dataset, path):
    # loads and preprocesses the mnist dataset
    import cv2 # to install: pip install opencv-python
    import os
    # Scan all the directories and create a list of labels
    labels = os.listdir(os.path.join(path, dataset))
    # Create lists for samples and labels
    X = []
    y = []
    counter = 0
    # For each label folder
    for label in labels:
        
        
        # And for each image in given folder
        for file in os.listdir(os.path.join(path, dataset, label)):
            counter += 1
            if dataset == 'train' and counter >= 20_000:
                break
            if dataset == 'test' and counter >= 4_000:
                break
                        # Read the image
            image = cv2.imread(os.path.join(
                path, dataset, label, file), cv2.IMREAD_UNCHANGED)
            # And append it and a label to the lists
            X.append(image)
            y.append(label)
    # Convert the data to proper numpy arrays and return
    print(counter)
    return np.array(X), np.array(y).astype('uint8')

def create_data_mnist(path):
    # Load both sets separately
    X, y = load_mnist_dataset('train', path)
    X_test, y_test = load_mnist_dataset('test', path)
    # And return all the data
    return X, y, X_test, y_test


## Main code to run everything

In [86]:
# Scale and reshape samples
X = (X.reshape(X.shape[0], -1).astype(np.float32) - 127.5) / 127.5
X_test = (X_test.reshape(X_test.shape[0], -1).astype(np.float32) -
             127.5) / 127.5

# Create Dense layer with 2 input features and 64 output values
dense1 = Layer_Dense(784, 64) # input layer

# Create ReLU activation (to be used with Dense layer):
activation1 = Activation_ReLU() 

# Create Dense layer with 2 input features and 64 output values
dense2 = Layer_Dense(64, 64)

# Create ReLU activation (to be used with Dense layer):
activation2 = Activation_ReLU()

# Create second Dense layer with 64 input features (as we take output
# of previous layer here) and 3 output values (output values)
dense3 = Layer_Dense(64, 10)
# Create Softmax classifier's combined loss and activation
loss_activation = Activation_Softmax_Loss_CategoricalCrossentropy()

# Create optimizer
optimizer = Optimizer_SGD(learning_rate=0.25)

# Train in loop
for epoch in range(701):

    # Perform a forward pass of our training data through this layer
    dense1.forward(X)

    # Perform a forward pass through activation function
    # takes the output of first dense layer here
    activation1.forward(dense1.output)
    
    dense2.forward(activation1.output)
    
    activation2.forward(dense2.output)
    # Perform a forward pass through second Dense layer
    # takes outputs of activation function of first layer as inputs
    dense3.forward(activation2.output)

    # Perform a forward pass through the activation/loss function
    # takes the output of second dense layer here and returns loss
    loss = loss_activation.forward(dense3.output, y)

    # Calculate accuracy from output of activation2 and targets
    # calculate values along first axis
    predictions = np.argmax(loss_activation.output, axis=1)
    if len(y.shape) == 10:
        y = np.argmax(y, axis=1)
    accuracy = np.mean(predictions==y)

    if not epoch % 100:
        print(f'epoch: {epoch}, ' +
              f'acc: {accuracy:.3f}, ' +
              f'loss: {loss:.3f}, ' +
              f'lr: {optimizer.current_learning_rate}')
    
    # Backward pass
    loss_activation.backward(loss_activation.output, y)
#     print(loss_activation.dinputs.T.shape)
    dense3.backward(loss_activation.dinputs)
    activation2.backward(dense3.dinputs)
    dense2.backward(activation2.dinputs)
    activation1.backward(dense2.dinputs)
    dense1.backward(activation1.dinputs)

    # Update weights and biases
    optimizer.pre_update_params()
    optimizer.update_params(dense1)
    optimizer.update_params(dense2)
    optimizer.post_update_params()

epoch: 0, acc: 0.001, loss: 2.303, lr: 0.25
epoch: 100, acc: 0.781, loss: 0.884, lr: 0.25
epoch: 200, acc: 0.852, loss: 0.419, lr: 0.25
epoch: 300, acc: 0.890, loss: 0.333, lr: 0.25
epoch: 400, acc: 0.908, loss: 0.290, lr: 0.25
epoch: 500, acc: 0.917, loss: 0.262, lr: 0.25
epoch: 600, acc: 0.922, loss: 0.243, lr: 0.25
epoch: 700, acc: 0.926, loss: 0.230, lr: 0.25


In [87]:
# Perform a forward pass of our training data through this layer
dense1.forward(X_test)

# Perform a forward pass through activation function
# takes the output of first dense layer here
activation1.forward(dense1.output)

dense2.forward(activation1.output)

activation2.forward(dense2.output)
# Perform a forward pass through second Dense layer
# takes outputs of activation function of first layer as inputs
dense3.forward(activation2.output)

# Perform a forward pass through the activation/loss function
# takes the output of second dense layer here and returns loss
loss = loss_activation.forward(dense3.output, y_test)

# Calculate accuracy from output of activation2 and targets
# calculate values along first axis
predictions = np.argmax(loss_activation.output, axis=1)
if len(y_test.shape) == 10:
    y_test = np.argmax(y_test, axis=1)
accuracy = np.mean(predictions==y_test)
print(accuracy)

0.8877219304826206
