In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
inputs = [1, 2, 3, 2.5]
weights = [0.2, 0.8, -0.5, 1.0] 
bias = 2

output = inputs[0] * weights[0] + inputs[1] * weights[1] + inputs[2] * weights[2] + inputs[3] * weights[3] + bias
output

In [None]:
weights = [
    [0.2, 0.8, -0.5, 1.0],
    [0.5, -0.91, 0.26, -0.5],
    [-0.26, -0.27, 0.17, 0.87]
]

biases = [2, 3, 0.5]

np.dot(weights, inputs) + biases


In [None]:
np.array([1, 2]) * [3, 4]

In [None]:
inputs = [
    [1, 2, 3, 2.5],
    [2.0, 5.0, -1.0, 2.0],
    [-1.5, 2.7, 3.3, -0.8]
]

np.dot(inputs, np.array(weights).T) + biases

In [None]:
weights2 = [
    [0.1, -0.14, 0.5],
    [-0.5, 0.12, -0.33],
    [-0.44, 0.73, -0.13]
]

biases2 = [-1, 2, -0.5]

layer1_outputs = np.dot(inputs, np.array(weights).T) + biases
layer2_outputs = np.dot(layer1_outputs, np.array(weights2).T) + biases2
layer2_outputs

In [None]:
np.random.seed(0)
X = [
    [1, 2, 3, 2.5],
    [2.0, 5.0, -1.0, 2.0],
    [-1.5, 2.7, 3.3, -0.8]
]

class Layer_Dense:
    def  __init__(self, n_inputs, n_neurons) -> None:
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))
    def forward(self, inputs):
        self.output = np.dot(inputs, self.weights) + self.biases

layer1 = Layer_Dense(
    len(X[0]), # 4
    7)
layer1.forward(X)
layer1.output


layer2 = Layer_Dense(
    7,
    2)
layer2.forward(layer1.output)
layer2.output

In [None]:
np.zeros((3,1))

## Hidden Layer Activation

 - [How to turn integers into Fibonacci coding efficiently?](https://stackoverflow.com/q/37479718/3563013)
 - [The Magic of ReLU - `max(0, x)`](https://youtu.be/gmjzbpSVY1A?t=484)

In [None]:
# %pip install nnfs

In [None]:
# https://gist.github.com/Sentdex/454cb20ec5acf0e76ee8ab8448e6266c
# https://cs231n.github.io/neural-networks-case-study/
from nnfs.datasets import spiral_data

X, y = spiral_data(100, 3)
plt.scatter(X[:,0], X[:,1], c=y, cmap="brg")

In [None]:
X

In [None]:
import nnfs

nnfs.init()

class Activation_ReLU:
    def forward(self, inputs):
        self.output = np.maximum(0, inputs)


layer1 = Layer_Dense(2, 5)
activation1 = Activation_ReLU()
layer1.forward(X)
print(layer1.output)
activation1.forward(layer1.output)
print(activation1.output)

# Softmax Activation

For output neurons, we don't use normal activation values

We exponentiate and then normalize (get probability - like percentage)

In [None]:
import math

layer_outputs = [4.8, 1.21, 2.385]
E = math.e

exp_values = []
for output in layer_outputs:
    exp_values.append(E**output)

norm_values = []
for value in exp_values:
    norm_values.append(value / sum(exp_values))

print(norm_values)
print(sum(norm_values))

Using Numpy

In [None]:
exp_values = np.exp(layer_outputs)
norm_values = exp_values / np.sum(exp_values)

print(norm_values)
print(sum(norm_values))

In [None]:
layer_outputs = [
    [4.8, 1.21, 2.385],
    [8.9, -1.81, 0.2],
    [1.41, 1.051, 0.026]
]

exp_values = np.exp(layer_outputs)
norm_values = exp_values / np.sum(exp_values, axis=1, keepdims=True)
print(norm_values)
print(sum(norm_values))

In [None]:
class Activation_Softmax:
    def forward(self, inputs):
        exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
        probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)
        self.output = probabilities

X, y = spiral_data(samples=100, classes=3)
dense1 = Layer_Dense(2, 3)
activation1 = Activation_ReLU()

dense2 = Layer_Dense(3, 3)
activation2 = Activation_ReLU()

dense1.forward(X)
activation1.forward(dense1.output)

dense2.forward(activation1.output)
activation2.forward(dense2.output)

print(dense2.output[:5])
print(activation2.output[:5])

# Loss Function

[One hot encoding](https://youtu.be/dEXPMQXoiLc?t=310)

L = log(yi, k)

In [None]:
import math

softmax_output = [0.7, 0.1, 0.2]
target_output = [1, 0, 0]
loss = -(
    math.log(softmax_output[0]) * target_output[0] + 
    math.log(softmax_output[1]) * target_output[1] + 
    math.log(softmax_output[2]) * target_output[2]
)
print(loss)
print(-math.log(softmax_output[0]))

In [None]:
softmax_outputs = np.array([
    [0.7, 0.1, 0.2],
    [0.1, 0.5, 0.4],
    [0.02, 0.9, 0.08]
])
class_targets = [0, 1, 1]
print(-np.log(softmax_outputs[[0,1,2], [class_targets]]))

In [None]:
class Loss:
    def calculate(self, output, y):
        sample_losses = self.forward(output, y)
        data_loss = np.mean(sample_losses)
        return data_loss
    
class Loss_CategoricalCrossEntropy(Loss):
    def forward(self, y_pred, y_true):
        samples = len(y_pred)
        y_pred_clipped = np.clip(y_pred, 1e-7, 1-1e-7)
        if len(y_true.shape) == 1:
            correct_confidences = y_pred_clipped[range(samples), y_true]
        elif len(y_true.shape) == 2:
            correct_confidences = np.sum(y_pred_clipped * y_true, axis=1)
        negative_log_likelihoods = -np.log(correct_confidences)
        return negative_log_likelihoods

In [None]:
nnfs.init()

X, y = spiral_data(samples=100, classes=3)
dense1 = Layer_Dense(2, 3)
activation1 = Activation_ReLU()

dense2 = Layer_Dense(3, 3)
activation2 = Activation_Softmax()

dense1.forward(X)
activation1.forward(dense1.output)

dense2.forward(activation1.output)
activation2.forward(dense2.output)

loss_function = Loss_CategoricalCrossEntropy()
loss = loss_function.calculate(activation2.output, y)
loss