In [3]:
import numpy as np #type: ignore

In [4]:
# an array of incremental gradient values
# dvalues is a dl_dz matrix
dvalues = np.array([
    [1.,1.,1.],
    [2.,2.,2.],
    [3.,3.,3.]
    ])
# we have three sets of input samples 
inputs = np.array([
    [1,2,3,2.5],
    [2.,5.,-1,2],
    [-1.5,2.7,3.3,-0.8]
])
# sum weights of given inputs
# and multiply by the passed in gradient for this neuron
# dl_dweights = dl_dz * dz_dw
dweights = np.dot(inputs.T,dvalues)
print(dweights)

[[ 0.5  0.5  0.5]
 [20.1 20.1 20.1]
 [10.9 10.9 10.9]
 [ 4.1  4.1  4.1]]


In [3]:
# gradient of loss with respect to the biases
# biases are row vecotr with shape (1, n_neurons)
biases = np.array([[2,3,0.5]])
# dl_db = dl_dz
dbiases = np.sum(dvalues,axis=0,keepdims=True)
print(dbiases)

[[6. 6. 6.]]


In [4]:
# gradient of loss with respect to inputs
weights = np.array([
    [ 0.2,0.5,-0.26],
    [ 0.8,-0.91,-0.27],
    [-0.5,0.26,0.17],
    [ 1.,-0.5,0.87]
])
dinputs = np.dot(dvalues,weights.T)
print(weights)

[[ 0.2   0.5  -0.26]
 [ 0.8  -0.91 -0.27]
 [-0.5   0.26  0.17]
 [ 1.   -0.5   0.87]]


In [None]:
# creating Dense Layer class  with backpropogation
class Dense:
    # layer initialization
    def __init__(self,n_inputs,n_neurons):
        # super().__init__()
        self.weights =  0.01*np.random.randn(n_inputs,n_neurons)
        self.biases = np.zeros((1,n_neurons))
    
    # forward pass of Dense Layer
    def forward(self,inputs):
        # calculate output values from weights and inputs,weights,biases
        self.inputs = inputs
        self.outputs = np.dot(inputs,self.weights) + self.biases
    
    # backward method or backpropogation 
    def backward(self,dvalues):
        # gradients on parameters
        # loss with respect to weights
        self.dweights = np.dot(self.inputs.T,dvalues)
        # loss with respect to biases
        self.dbiases = np.sum(dvalues,axis=0,keepdims=True)
        # loss with respect to the inputs
        self.dinputs = np.dot(dvalues,self.weights.T)

In [None]:
# creating relu class with backpropogation
class ReLU:
    # forward pass
    def forward(self,inputs):
        # remember input values
        self.inputs = inputs
        self.output = np.maximum(0,inputs)
    
    # backward method in relu activation function
    def backward(self,dvalues):
        # since we need to modify original variable,
        # lets make a copy of value event first
        self.dinputs = dvalues.copy()
        # Zero gradient where input values are negative
        self.dinputs[self.inputs <= 0] = 0

In [7]:
# creating softmax class
class Softmax:
    def forward(self,inputs):
        exp_values = np.exp(inputs - np.max(inputs,axis=1,keepdims=True))
        probabelities = exp_values/ np.sum(exp_values,axis=1,keepdims=True)
        self.output = probabelities

In [15]:
class Loss:
    def calculate(self,output,y):
        sample_losses = self.forward(output,y)
        data_loss = np.mean(sample_losses)
        return data_loss

In [None]:
class CrossEntropyLoss(Loss):
    def forward(self,y_pred,y_true):
        # storing y_pred and y_true
        self.y_true = y_true
        self.y_pred = y_pred
        # number of samples in a batch
        samples = len(y_pred)
        # clip data to prevent division by 0
        # clip both sides to not drag mean towards any value
        y_pred_clipped = np.clip(y_pred,1e-7,1-1e-7)
        # probabelities for target values
        # only if categorical labels
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[
            range(samples),
            y_true]
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(
                y_pred_clipped*y_true,axis=1
            )
        negative_log_likelihoods = -np.log(correct_confidences)
        return negative_log_likelihoods
    
    # backward pass or backpropogation
    def backward(self,dvalues,y_true):
        # number of samples
        samples = len(dvalues)
        # number of labels in every sample
        # we will use the first sample to count them
        labels = len(dvalues[0])
        # if labels are sparse turn them into one hot vector
        if len(y_true.shape) == 1:
            y_true = np.eye(labels)[y_true]
        # calculate gradient
        self.dinputs = -y_true / dvalues
        # normalize gradient
        self.dinputs = self.dinputs/samples

In [4]:
np.eye(2,dtype=int)

array([[1, 0],
       [0, 1]])

In [12]:
y_true = np.array([1,2,3])

In [13]:
np.eye(4)[y_true]

array([[0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])