# Library load

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
from nnfs.datasets import spiral_data
import numpy as np
import nnfs
import time
import torch
import torch.nn.functional as F

np.random.seed(42)
nnfs.init()

Neural net layer + activation functions definitions

In [2]:
# Updated Layer_Dense class with activation function support
class Layer_DenseNumpy:
    def __init__(self, n_inputs, n_neurons, activation=None):
        """
        Initialize weights, biases, and activation function for the layer.
        :param n_inputs: Number of inputs to the layer
        :param n_neurons: Number of neurons in the layer
        :param activation: Activation function (e.g., relu, sigmoid, tanh, softmax)
        """
        # NumPy version
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))
        self.activation = activation

    def forward_prop(self, inputs):
        """
        Perform the forward pass for the layer.
        :param inputs: Input data
        """
        self.output_raw = inputs @ self.weights + self.biases
        self.output = self.activation(self.output_raw) if self.activation else self.output_raw

class Layer_DenseTorch:
    def __init__(self, n_inputs, n_neurons, activation=None):
        """
        Initialize weights, biases, and activation function for the layer.
        :param n_inputs: Number of inputs to the layer
        :param n_neurons: Number of neurons in the layer
        :param activation: Activation function (e.g., relu, sigmoid, tanh, softmax)
        """
        # PyTorch version
        # Check if CUDA is available and set device accordingly
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.weights = 0.01 * torch.randn(n_inputs, n_neurons, device=device)
        self.biases = torch.zeros(1, n_neurons, device=device)
        self.activation = activation

    def forward_prop(self, inputs):
        """
        Perform the forward pass for the layer.
        :param inputs: Input data
        """
        self.output_raw = inputs @ self.weights + self.biases
        self.output = self.activation(self.output_raw) if self.activation else self.output_raw

# Define activation functions as a dictionary for better modularity
# CPU (numpy)
activation_functions = {
    "linear": lambda x: x,
    "relu": lambda x: np.maximum(0, x),
    "sigmoid": lambda x: 1 / (1 + np.exp(-x)),
    "tanh": lambda x: np.tanh(x),
    "softmax": lambda x: np.exp(x - np.max(x, axis=1, keepdims=True)) / np.sum(np.exp(x - np.max(x, axis=1, keepdims=True)), axis=1, keepdims=True)
}

# GPU (PyTorch)
activation_functions_torch = {
    "linear": lambda x: x,
    "relu": lambda x: F.relu(x),
    "sigmoid": lambda x: torch.sigmoid(x),
    "tanh": lambda x: torch.tanh(x),
    "softmax": lambda x: F.softmax(x, dim=1)
}

In [3]:
# Load dataset
X, y = spiral_data(samples=100, classes=3)
print(type(X), type(y), X.dtype, y.dtype, X.shape, y.shape)

<class 'numpy.ndarray'> <class 'numpy.ndarray'> float32 uint8 (300, 2) (300,)


In [4]:
X_torch = torch.tensor(X, dtype=torch.float32).cuda()
y_torch = torch.tensor(y, dtype=torch.float32).cuda()

print(X_torch.shape)
print(X_torch.ndim)
print(X_torch.device)

torch.Size([300, 2])
2
cuda:0


Forward pass with Torch

In [8]:
dense1 = Layer_DenseTorch(2, 3, activation=activation_functions_torch["relu"])
dense2 = Layer_DenseTorch(3, 3, activation=activation_functions_torch["softmax"])
dense3 = Layer_DenseTorch(3, 1, activation=activation_functions_torch["linear"])

In [9]:
dense1.forward_prop(X_torch)
dense2.forward_prop(dense1.output)
dense3.forward_prop(dense2.output)

Forward pass outputs

In [10]:
print('Dense1 output shape:', dense1.output.shape)  # (100, 3)
print('Dense2 output shape:', dense2.output.shape)  # (100, 3)
print('Dense3 output shape:', dense3.output.shape)  # (100, 1)
print('\nDense1 output:', dense1.output[:3])  # First 3 samples
print('Dense2 output:', dense2.output[:3])  # First 3 samples
print('Dense3 output:', dense3.output[:3])  # First 3 samples

Dense1 output shape: torch.Size([300, 3])
Dense2 output shape: torch.Size([300, 3])
Dense3 output shape: torch.Size([300, 1])

Dense1 output: tensor([[0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 8.0048e-05, 0.0000e+00],
        [7.6293e-05, 1.0926e-04, 0.0000e+00]], device='cuda:0')
Dense2 output: tensor([[0.3333, 0.3333, 0.3333],
        [0.3333, 0.3333, 0.3333],
        [0.3333, 0.3333, 0.3333]], device='cuda:0')
Dense3 output: tensor([[0.0070],
        [0.0070],
        [0.0070]], device='cuda:0')


La tercera capa de la red neuronal es coherente en cuanto dimensiones pero no tiene sentido meterla aquí. El output de la 2ª capa es 1/3 para cada uno de las muestras

Our example model is currently random. To remedy this, we need a way to
calculate how wrong the neural network is at current predictions and begin adjusting weights
and biases to decrease error over time. Thus, our next step is to quantify how wrong the model is
through what’s defined as a ​loss function