In [1]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from nnfs.datasets import spiral_data
import nnfs
nnfs.init()

DENSE LAYER CLASS

In [10]:
class Layer_Dense:

    def __init__(self, n_inputs, n_neurons,
                weight_regularizer_l1=0, weight_regularizer_l2=0,
                bias_regularizer_l1=0, bias_regularizer_l2=0):

        self.weights = 0.01*np.random.randn(n_inputs,n_neurons)
        self.biases = np.zeros((1,n_neurons))
        self.weight_regularizer_l1 = weight_regularizer_l1
        self.weight_regularizer_l2 = weight_regularizer_l2
        self.bias_regularizer_l1 = bias_regularizer_l1
        self.bias_regularizer_l2 = bias_regularizer_l2

    def forward (self, inputs):
        self.inputs = inputs
        self.output = np.dot(inputs, self.weights) + self.biases 
        
    def backward(self, dvalues):

        self.dweights = np.dot(self.inputs.T, dvalues)
        self.dbiases = np.sum(dvalues, axis=0, keepdims=True)

        # L1 on weights
        if self.weight_regularizer_l1 > 0:
            dL1 = np.ones_like(self.weights)
            dL1[self.weights < 0] = -1
            self.dweights += self.weight_regularizer_l1 * dL1

        # L2 on weights
        if self.weight_regularizer_l2 > 0:
            self.dweights += 2 * self.weight_regularizer_l2 * self.weights

        # L1 on biases
        if self.bias_regularizer_l1 > 0:
            dL1 = np.ones_like(self.biases)
            dL1[self.biases < 0] = -1
            self.dbiases += self.bias_regularizer_l1 * dL1

        # L2 on biases
        if self.bias_regularizer_l2 > 0:
            self.dbiases += 2 * self.bias_regularizer_l2 * self.biases

        self.dinputs = np.dot(dvalues, self.weights.T)


ACTIVATION FUNXTIONS (ReLu and SOFTMAX)

In [4]:
class Activation_ReLu:

    def forward(self, inputs):
        self.inputs = inputs
        self.output= np.maximum(0,inputs)

    def backward(self, dvalues):
        self.dinputs = dvalues.copy()
        self.dinputs[self.inputs<=0]=0

class Activation_Softmax:

    def forward(self, inputs):
        exp_values = np.exp(inputs - np.max(inputs, axis = 1, keepdims = True))
        probabilities = exp_values/np.sum(exp_values, axis = 1, keepdims = True)
        self.output = probabilities

LOSS CLASS

In [5]:
class Loss:
	def calculate(self, outputs, y):
		sample_losses = self.forward(outputs, y)
		mean_loss = np.mean(sample_losses)
		return mean_loss 
	
	def regularization_loss(self, layer):
		regularization_loss = 0
		if (layer.weight_regularizer_l1 > 0):
			regularization_loss += layer.weight_regularizer_l1*np.sum(np.abs(layer.weights))
		if (layer.weight_regularizer_l2 > 0):
			regularization_loss += layer.weight_regularizer_l2*np.sum(layer.weights*layer.weights)
		if (layer.bias_regularizer_l1 > 0):
			regularization_loss += layer.bias_regularizer_l1*np.sum(np.abs(layer.biases))
		if (layer.bias_regularizer_l2 > 0):
			regularization_loss += layer.bias_regularizer_l2*np.sum(layer.biases*layer.biases)
		return regularization_loss

class Loss_CategoricalCrossentropy(Loss):
	def forward(self, y_pred, y_true):
		samples = len(y_pred)
		y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7)
		if len(y_true.shape)==1:
			correct_confidences = y_pred_clipped[range(samples), y_true]
		else:
			correct_confidences = np.sum(y_true*y_pred_clipped, axis = 1)
		negative_log_likelihoods = -np.log(correct_confidences)
		return negative_log_likelihoods

class Activation_Softmax_Loss_CategoricalCrossentropy:

    def __init__(self):
        self.activation = Activation_Softmax()
        self.loss = Loss_CategoricalCrossentropy()
    
    def forward(self, inputs, y_true):
        self.activation.forward(inputs)
        self.output = self.activation.output
        return self.loss.calculate(self.output, y_true)

    def backward(self, dvalues, y_true):
        samples = len(dvalues)
        if len(y_true.shape)==2:
            y_true = np.argmax(y_true, axis = 1)
        self.dinputs = dvalues.copy()
        self.dinputs[range(samples), y_true]-=1
        self.dinputs = self.dinputs/samples

SGD OPTIMIZER WITH MOMENTUM

In [6]:
class Optimizer_SGD:
    def __init__(self, learning_rate=1., decay=0, momentum=0.):
        self.learning_rate  =learning_rate
        self.current_learning_rate = learning_rate
        self.decay = decay
        self.iterations = 0
        self.momentum=momentum
    def pre_update_params(self):
        if self.decay:
            self.current_learning_rate = self.learning_rate * \
                (1. / (1. + self.decay*self.iterations))
    def update_params(self, layer):
        if self.momentum:
            if not hasattr(layer, 'weight_momentums'):
                layer.weight_momentums = np.zeros_like(layer.weights)
                layer.bias_momentums = np.zeros_like(layer.biases)
            weight_updates = self.momentum*layer.weight_momentums - self.current_learning_rate*layer.dweights
            layer.weight_momentums = weight_updates
            bias_updates = self.momentum*layer.bias_momentums -self.current_learning_rate*layer.dbiases
            layer.bias_momentums = bias_updates   
        else:
            weight_updates = -self.current_learning_rate*layer.dweights
            bias_updates = -self.current_learning_rate*layer.dbiases
        layer.weights += weight_updates
        layer.biases += bias_updates
    def post_update_params(self):
        self.iterations+=1


ADAM OPTIMIZER

In [7]:
class Optimizer_Adam:
    def  __init__(self, learning_rate = 0.001, decay =0., epsilon = 1e-7, beta_1 = 0.9, beta_2 = 0.999):
        self.learning_rate = learning_rate
        self.current_learning_rate = learning_rate
        self.epsilon = epsilon
        self.decay = decay
        self.iterations = 0
        self.beta_1 = beta_1
        self.beta_2 = beta_2
    def pre_update_params(self):
        if self.decay:
            self.current_learning_rate = self.learning_rate*(1/(1+ self.decay*self.iterations))
    def update_params(self, layer):
        if not hasattr(layer, 'weight_cache'):
            layer.weight_cache = np.zeros_like(layer.weights)
            layer.weight_momentums = np.zeros_like(layer.weights)
            layer.bias_cache = np.zeros_like(layer.biases)
            layer.bias_momentums = np.zeros_like(layer.biases)
        layer.weight_momentums = self.beta_1 * layer.weight_momentums + (1 - self.beta_1)*layer.dweights
        layer.bias_momentums = self.beta_1*layer.bias_momentums + (1-self.beta_1)*layer.dbiases

        weight_momentums_corrected = layer.weight_momentums/(1 - (self.beta_1 **(self.iterations+1)))
        bias_momentums_corrected = layer.bias_momentums/(1 - (self.beta_1 **(self.iterations+1)))

        layer.weight_cache = self.beta_2*layer.weight_cache + (1-self.beta_2)*(layer.dweights**2)
        layer.bias_cache = self.beta_2*layer.bias_cache + (1-self.beta_2)*(layer.dbiases**2)

        weight_cache_corrected = layer.weight_cache/(1 - (self.beta_2**(self.iterations+1)))
        bias_cache_corrected = layer.bias_cache/(1 - (self.beta_2**(self.iterations+1)))
        
        layer.weights += -self.current_learning_rate*weight_momentums_corrected/ (np.sqrt(weight_cache_corrected)+self.epsilon)
        layer.biases += -self.current_learning_rate*bias_momentums_corrected/ (np.sqrt(bias_cache_corrected)+self.epsilon)
    def post_update_params(self):
        self.iterations+=1

In [8]:
# CODING THE DROPOUT LAYER
class Layer_Dropout:
    def __init__(self, rate):
        self.rate = 1 - rate  # self.rate is the proportion of neurons that remain active
        # the argument passed to this function is the dropout rate meaning the neurons that are dropped
    def forward(self, inputs):
        self.inputs = inputs #output of previous layer that is fed to the dropout
        self.binary_mask = np.random.binomial(1, self.rate, size=inputs.shape)/self.rate
        self.output = inputs* self.binary_mask
    def backward(self, dvalues):
        self.dinputs = dvalues* self.binary_mask

TRAINING THE NEURAL NETWORK WITH AND WITHOUR REGULARIZATION (UNCOMMENT dense1 TO CHANGE)

In [11]:
import numpy as np
X, y = spiral_data(samples = 1000, classes =3)

# # without regularization
# dense1 = Layer_Dense(2,64)

# with regularization
dense1 = Layer_Dense(2,64, weight_regularizer_l2 = 5e-4, bias_regularizer_l2 = 5e-4)
activation1 = Activation_ReLu()
dropout1 = Layer_Dropout(0.1)
dense2 = Layer_Dense(64,3)
loss_activation = Activation_Softmax_Loss_CategoricalCrossentropy()

optimizer = Optimizer_Adam(learning_rate = 0.05,decay=5e-5)

for epoch in range(10001):
    dense1.forward(X)
    activation1.forward(dense1.output)
    dropout1.forward(activation1.output)
    dense2.forward(dropout1.output)
    data_loss = loss_activation.forward(dense2.output, y)

    regularization_loss = (
        loss_activation.loss.regularization_loss(dense1) +
        loss_activation.loss.regularization_loss(dense2)
    )
    loss = data_loss + regularization_loss

    predictions = np.argmax(loss_activation.output, axis=1)
    if len(y.shape) == 2:
        y = np.argmax(y, axis=1)
    accuracy = np.mean(predictions == y)
    
    if not epoch % 100:
        print(f'epoch: {epoch}, ' +
              f'acc: {accuracy:.3f}, ' +
              f'loss: {loss:.3f},' +
              f'lr: {optimizer.current_learning_rate}')

    loss_activation.backward(loss_activation.output, y)
    dense2.backward(loss_activation.dinputs)
    dropout1.backward(dense2.dinputs)
    activation1.backward(dropout1.dinputs)
    dense1.backward(activation1.dinputs)
   

    optimizer.pre_update_params()
    optimizer.update_params(dense1)
    optimizer.update_params(dense2)
    optimizer.post_update_params()

#note: no backward pass and dropout in testing

X_test, y_test = spiral_data(samples =100, classes = 3)
dense1.forward(X_test)
activation1.forward(dense1.output)
dense2.forward(activation1.output)
loss = loss_activation.forward(dense2.output, y_test)

predictions  = np.argmax(loss_activation.output, axis = 1)
if len(y_test.shape)==2:
    y_test = np.argmax(y_test,axis = 1)
accuracy = np.mean(predictions == y_test)
print(f'validation acc:{accuracy:.3f}, loss:{loss:.3f}')

epoch: 0, acc: 0.331, loss: 1.099,lr: 0.05
epoch: 100, acc: 0.529, loss: 0.917,lr: 0.04975371909050202
epoch: 200, acc: 0.631, loss: 0.847,lr: 0.049507401356502806
epoch: 300, acc: 0.623, loss: 0.825,lr: 0.0492635105177595
epoch: 400, acc: 0.618, loss: 0.825,lr: 0.04902201088288642
epoch: 500, acc: 0.621, loss: 0.825,lr: 0.048782867456949125
epoch: 600, acc: 0.633, loss: 0.813,lr: 0.04854604592455945
epoch: 700, acc: 0.622, loss: 0.811,lr: 0.048311512633460556
epoch: 800, acc: 0.632, loss: 0.813,lr: 0.04807923457858551
epoch: 900, acc: 0.630, loss: 0.818,lr: 0.04784917938657352
epoch: 1000, acc: 0.628, loss: 0.776,lr: 0.04762131530072861
epoch: 1100, acc: 0.642, loss: 0.791,lr: 0.04739561116640599
epoch: 1200, acc: 0.636, loss: 0.813,lr: 0.04717203641681212
epoch: 1300, acc: 0.647, loss: 0.797,lr: 0.04695056105920466
epoch: 1400, acc: 0.618, loss: 0.812,lr: 0.04673115566147951
epoch: 1500, acc: 0.631, loss: 0.788,lr: 0.046513791339132055
epoch: 1600, acc: 0.637, loss: 0.795,lr: 0.04629