In [25]:
import numpy as np
inputs = [[1, 2, 3, 2.5],
        [2.0, 5.0, -1.0, 2.0],
         [-1.5, 2.7, 3.3, -0.8]]
weights = [[0.2, 0.8, -0.5, 1],
            [0.5, -0.91, 0.26, -0.5],
            [-0.26, -0.27, 0.17, 0.87]]
biases = [2,3,0.5]


In [26]:
layer1_out = np.dot(inputs,np.array(weights).T)+biases
layer1_out

array([[ 4.8  ,  1.21 ,  2.385],
       [ 8.9  , -1.81 ,  0.2  ],
       [ 1.41 ,  1.051,  0.026]])

In [42]:
weights2 = [[0.1, -0.14, 0.5],
            [-0.5, 0.12, -0.33],
            [-0.44, 0.73, -0.13]]
biases2 = [-1, 2, -0.5]


In [43]:
weights2 = np.array(weights2)

layer2_out = np.dot(layer1_out, weights2.T) + biases2
layer2_out

array([[ 0.5031 , -1.04185, -2.03875],
       [ 0.2434 , -2.7332 , -5.7633 ],
       [-0.99314,  1.41254, -0.35655]])

In [188]:
class Layer_Dense:
    
    def __init__(self, n_inputs, n_neurons):
        """initializes weights and biases"""
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))
        
    def forward(self, inputs):
        self.output = np.dot(inputs, self.weights) + self.biases
        
    def backward(self, dvalues):
        self.dweights = np.dot(self.inputs.T, dvalues)
        self.dbiases = np.sum(dvalues, axis=0, keepdims=True)
        self.inputs = np.dot(dvalues, self.weights.T)
        

In [45]:
import nnfs
from nnfs.datasets import spiral_data
import matplotlib.pyplot as plt

X, y = spiral_data(samples=100, classes= 3)

In [46]:
dense1 = Layer_Dense(2, 3)

dense1.forward(X)
dense1.output[:5]

array([[ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00],
       [-7.96105637e-05, -1.97224600e-05,  6.68923973e-05],
       [-1.69826106e-04, -2.99537171e-05,  1.44244557e-04],
       [ 5.78901014e-05, -2.57421094e-04, -8.33785640e-05],
       [-3.09068303e-04, -8.70711657e-05,  2.58350601e-04]])

In [189]:
class Acti_Relu:
    
    def forward(self, inputs):
        self.output = np.maximum(0,inputs)
        
    def backward(self, dvalues):
        self.dinputs = dvalues.copy()
        self.dinputs[self.inputs <= 0] = 0

In [48]:
activation1 = Acti_Relu()

activation1.forward(dense1.output)

print(activation1.output[:5])

[[0.00000000e+00 0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 6.68923973e-05]
 [0.00000000e+00 0.00000000e+00 1.44244557e-04]
 [5.78901014e-05 0.00000000e+00 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 2.58350601e-04]]


In [59]:
exp_values = np.exp(inputs)

probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)

In [204]:
class Acti_SoftMax:
    
    def forward(self, inputs):
        exp_values = np.exp(inputs - np.max(inputs,
                                           axis=1,
                                           keepdims=True))
        probabilities = exp_values / np.sum(exp_values,
                                           axis=1,
                                           keepdims=True)
        self.output = probabilities
        
    def backward(self, dvalues):
        
        self.dinputs = np.empty_like(dvalues)
        
        for i, (single_output, single_dvalues) in \
                            enumerate(zip(self.output, dvalues)):
            single_output = single_output.reshape(-1,1)
            
            jacobian_matrix = np.diagflat(single_output) - \
                                np.dot(single_output, single_output.T)
            self.dinputs[i] = np.dot(jacobian_matrix, single_dvalues)

In [68]:
softmax = Acti_SoftMax()

softmax.forward([[1, 2, 3]])
print(softmax.output)

[[0.09003057 0.24472847 0.66524096]]


In [71]:
dense1 = Layer_Dense(2, 3)
dense2 = Layer_Dense(3, 3)

activation1 = Acti_Relu()
activation2 = Acti_SoftMax()

dense1.forward(X)
activation1.forward(dense1.output)
dense2.forward(activation1.output)
activation2.forward(dense2.output)

print(activation2.output[:5])


[[0.33333333 0.33333333 0.33333333]
 [0.33333349 0.33333319 0.33333332]
 [0.33333367 0.33333303 0.3333333 ]
 [0.33333333 0.33333333 0.33333333]
 [0.33333395 0.33333278 0.33333327]]


In [119]:
softmax_out = np.array([[0.7, 0.1, 0.2],
              [0.5, 0.1, 0.4],
              [0.02, 0.9, 0.08]])
class_targets = np.array([[1, 0, 0], [0,1,0], [0, 1, 0]]) # index of write

neg_log = -np.log(softmax_out[range(len(softmax_out)), class_targets])
average_loss = np.mean(neg_log)

In [120]:
if len(class_targets.shape)==1:
    correct_confidence = softmax_out[
                            range(len(softmax_out)),
                            class_targets]
elif len(class_targets.shape) == 2:
    correct_confidence = np.sum(softmax_out * class_targets, axis=1)
    
neg_log = -np.log(correct_confidence)

average_loss = np.mean(neg_log)

print(average_loss)

0.9215401841968681


In [121]:
class Loss:
    def calculate(self, output, y):
        sample_losses = self.forward(output, y)
        data_loss = np.mean(sample_losses)
        return data_loss

In [191]:
class Loss_CategoCrossentropy(Loss): # inheriting LOSS class
    
    def forward(self, y_pred, y_true):
        
        samples = len(y_pred)
        y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7) # to prevent overflow and diveding by 0
        
        if len(class_targets.shape)==1:
            correct_confidence = softmax_out[
                                    range(len(softmax_out)),
                                    class_targets]
        elif len(class_targets.shape) == 2:
            correct_confidence = np.sum(softmax_out * class_targets, axis=1)

        neg_log = -np.log(correct_confidence)

        return neg_log
    
    def backward(self, dvalues, y_true):
        
        samples=len(dvalues)
        
        labels = len(dvalues[0])
        
        if len(y_true.shape) == 1:
            y_true = np.eye(labels)[y_true]
            
        self.dinputs = -y_true / dvalues
        
        self.dinputs = self.dinputs / samples
        

In [196]:
class Activation_Softmax_Loss_CategoricalCrossentropy():
    
    def __init__(self):
        self.activation = Acti_SoftMax()
        self.loss = Loss_CategoCrossentropy()
        
    def forward(self, inputs, y_true):
        self.activation.forward(inputs)
        
        self.output = self.activation.output
        
        return self.loss.calculate(self.output, y_true)
    
    def backward(self, dvalues, y_true):
        
        samples = len(dvalues)
        
        if len(y_true.shape) == 2:
            y_true = np.argmax(y_true, axis=1)
            
        self.dinputs = dvalues.copy()
        
        self.dinputs[range(samples), y_true] -= 1
        
        self.dinputs = self.dinputs / samples

In [123]:
loss_func = Loss_CategoCrossentropy()
loss = loss_func.calculate(softmax_out, class_targets)
loss

0.9215401841968681

In [127]:
dense1 = Layer_Dense(2, 3)
dense2 = Layer_Dense(3, 3)

activation1 = Acti_Relu()
activation2 = Acti_SoftMax()

loss_func = Loss_CategoCrossentropy()

dense1.forward(X)
activation1.forward(dense1.output)
dense2.forward(activation1.output)
activation2.forward(dense2.output)

print(activation2.output[:5])

loss = loss_func.calculate(activation2.output, y)

print(loss)

[[0.33333333 0.33333333 0.33333333]
 [0.33333456 0.33333318 0.33333226]
 [0.33333576 0.33333303 0.33333121]
 [0.33333652 0.33333293 0.33333055]
 [0.33333825 0.33333271 0.33332904]]
0.9215401841968681


In [124]:
predictions = np.argmax(softmax_out, axis=1)

if len(class_targets.shape) == 2:
    class_targets = np.argmax(class_targets, axis=1)
    
accuracy = np.mean(predictions==class_targets)

print("acc", accuracy)

acc 0.6666666666666666


In [130]:
from nnfs.datasets import vertical_data

X, y =vertical_data(samples=100, classes=3)

dense1 = Layer_Dense(2, 3)
activation1 = Acti_Relu()

dense2 = Layer_Dense(3, 3)
activation2 = Acti_SoftMax()

loss_func = Loss_CategoCrossentropy()

In [140]:
x = np.array([1.0, -2.0, 3.0])
w = np.array([-3.0, -1.0, 2.0])
b = 1.0

z = np.dot(x,w.T)+b
y =(z,0)

In [151]:
dvalue = 1.0
# Derivative of ReLU
drelu_dz = dvalue *(1.0 if z > 0 else 0.0)
print(drelu_dz)
# Partial derivative of sum
dsum_dxw0 = 1 
drelu_dxw0 = drelu_dz * dsum_dxw0
dsum_dxw1 = 1 
drelu_dxw1 = drelu_dz * dsum_dxw1
dsum_dxw2 = 1 
drelu_dxw2 = drelu_dz * dsum_dxw2
dsum_dxb = 1 
drelu_db = drelu_dz * dsum_dxb
print(drelu_dxw0, drelu_dxw1, drelu_dxw2, drelu_dxb)
# Partial derivative of multiplication
dmul_dx0 = w[0]
drelu_dx0 = drelu_dxw0 * dmul_dx0
dmul_dx1 = w[1]
drelu_dx1 = drelu_dxw1 * dmul_dx1
dmul_dx2 = w[2]
drelu_dx2 = drelu_dxw2 * dmul_dx2

dmul_dw0 = x[0]
drelu_dw0 = drelu_dxw0 * dmul_dw0
dmul_dw1 = x[1]
drelu_dw1 = drelu_dxw1 * dmul_dw1
dmul_dw2 = x[2]
drelu_dw2 = drelu_dxw2 * dmul_dw2

print(drelu_dx0, drelu_dw0, drelu_dx1, drelu_dw1, drelu_dx2, drelu_dw2)

1.0
1.0 1.0 1.0 1.0
-3.0 1.0 -1.0 -2.0 2.0 3.0


In [152]:
dx = [drelu_dx0, drelu_dx1, drelu_dx2]
dw = [drelu_dw0, drelu_dw1, drelu_dw2]
db = drelu_db

In [162]:
w += (np.multiply(-0.001, dw))
b += -0.001 * db

array([-3.001, -0.998,  1.997])

In [177]:
z = np.dot(x,w.T)+b
y =(z,0)
y

(5.985, 0)

In [182]:
dvalues = np.array([[1.,1.,1.],
                   [2.,2.,2.],
                   [3.,3.,3.]])

weights = np.array([[0.2, 0.8, -0.5, 1],
                    [0.5, -0.91, 0.26, -0.5],
                    [-0.26, -0.27, 0.17, 0.87]]).T

dinputs = np.dot(dvalues, weights.T)
dinputs

array([[ 0.44, -0.38, -0.07,  1.37],
       [ 0.88, -0.76, -0.14,  2.74],
       [ 1.32, -1.14, -0.21,  4.11]])

In [171]:
dvalues = np.array([[1.,1.,1.],
                   [2.,2.,2.],
                   [3.,3.,3.]])

inputs = np.array([[1, 2, 3, 2.5],
                    [2.0, 5.0, -1.0, 2.0],
                     [-1.5, 2.7, 3.3, -0.8]])
dweights = np.dot(inputs.T, dvalues)
dweights


array([[ 0.5,  0.5,  0.5],
       [20.1, 20.1, 20.1],
       [10.9, 10.9, 10.9],
       [ 4.1,  4.1,  4.1]])

In [175]:
dvalues = np.array([[1.,1.,1.],
                   [2.,2.,2.],
                   [3.,3.,3.]])

biases = [2,3,0.5]

dbiases = np.sum(dvalues, axis=0, keepdims=True)
dbiases

array([[6., 6., 6.]])

In [187]:
z = np.array([[1,2,-3,-4],
             [2, -7, -1, 3],
             [-1, 2, 5, -1]])

dvalues = np.array([[1,2,3,4,],
                   [5,6,7,8],
                   [9,10,11,12]])

drelu = dvalues.copy()
drelu[z<=0]=0

drelu

array([[ 1,  2,  0,  0],
       [ 5,  0,  0,  8],
       [ 0, 10, 11,  0]])

In [206]:
nnfs.init()
softmax_out = np.array([[0.7, 0.1, 0.2],
                      [0.1, 0.5, 0.4],
                      [0.02, 0.9, 0.08]])
class_targets = np.array([0, 1, 1])

softmax_loss = Activation_Softmax_Loss_CategoricalCrossentropy()
softmax_loss.backward(softmax_out, class_targets)
dvalues1 = softmax_loss.dinputs

activation = Acti_SoftMax()
activation.output = softmax_out

loss = Loss_CategoCrossentropy()
loss.backward(softmax_out, class_targets)
activation.backward(loss.dinputs)

dvalues2 = activation.dinputs

dvalues1, dvalues2

(array([[-0.1       ,  0.03333333,  0.06666667],
        [ 0.03333333, -0.16666667,  0.13333333],
        [ 0.00666667, -0.03333333,  0.02666667]]),
 array([[-0.09999999,  0.03333334,  0.06666667],
        [ 0.03333334, -0.16666667,  0.13333334],
        [ 0.00666667, -0.03333333,  0.02666667]]))