In [4]:
import numpy as np

# Find a small float to avoid division by zero
epsilon = np.finfo(float).eps

# Sigmoid function and its differentiation
def sigmoid(z):
    return 1/(1+np.exp(-z.clip(-500, 500)))

def dsigmoid(z):
    s = sigmoid(z)
    return 2 * s * (1-s)

# ReLU function and its differentiation
def relu(z):
    return np.maximum(0, z)

def drelu(z):
    return (z > 0).astype(float)

In [6]:
import numpy as np

epsilon = 1e-12  # Pequeño valor para evitar log(0) o división por 0

# Loss function L(y, yhat) and its differentiation
def cross_entropy(y, yhat):
    """
    Binary cross entropy function
    L = -y log(yhat) - (1 - y) log(1 - yhat)

    Args:
        y, yhat (np.array): 1xn matrices where n is the number of data instances

    Returns:
        average cross entropy value of shape 1x1, averaging over the n instances
    """
    yhat = yhat.clip(epsilon, 1 - epsilon)
    return (-(y.T @ np.log(yhat) + (1 - y).T @ np.log(1 - yhat)) / y.shape[1])

def d_cross_entropy(y, yhat):
    """
    Derivative of cross entropy with respect to yhat: dL/dyhat
    """
    yhat = yhat.clip(epsilon, 1 - epsilon)
    return -np.divide(y, yhat) + np.divide(1 - y, 1 - yhat)

In [105]:
import numpy as np

class mlp:
    """Multilayer Perceptron using NumPy"""
    
    def __init__(self, layersizes, activations, derivatives, lossderiv):
        """
        Inicializa la configuración de la red sin inicializar pesos ni sesgos.
        
        Args:
            layersizes (list): Lista con el número de neuronas por capa.
            activations (list): Lista de funciones de activación por capa.
            derivatives (list): Lista de derivadas de activaciones.
            lossderiv (function): Derivada de la función de pérdida.
        """
        self.layersizes = layersizes
        self.activations = activations
        self.derivatives = derivatives
        self.lossderiv = lossderiv
        
        L = len(layersizes)
        self.z = [None] * L
        self.W = [None] * L
        self.b = [None] * L
        self.a = [None] * L
        self.dz = [None] * L
        self.dW = [None] * L
        self.db = [None] * L
        self.da = [None] * L

    def initialize(self, seed=42):
        """Inicializa los pesos y sesgos con valores aleatorios."""
        np.random.seed(seed)
        sigma = 0.1

        for l, (n_in, n_out) in enumerate(zip(self.layersizes, self.layersizes[1:]), 1):
            self.W[l] = np.random.randn(n_in, n_out) * sigma
            self.b[l] = np.random.randn(1, n_out) * sigma

    def forward(self, x):
        """
        Propagación hacia adelante (forward pass).
        
        Args:
            x (np.array): Entrada a la red de tamaño (n_instancias, n_features)
        
        Returns:
            Salida de la red neuronal
        """
        self.a[0] = x

        for l, func in enumerate(self.activations, 1):
            self.z[l] = self.a[l-1] @ self.W[l] + self.b[l]
            self.a[l] = func(self.z[l])

        return self.a[-1]

    def backward(self, y, yhat):
        """
        Propagación hacia atrás (backpropagation).
        
        Args:
            y (np.array): Salida verdadera
            yhat (np.array): Salida predicha por la red
        """

        self.da[-1] = self.lossderiv(y, yhat)

        for l, func in reversed(list(enumerate(self.derivatives, 1))):
            self.dz[l] = self.da[l] * func(self.z[l])
            self.dW[l] = self.a[l-1].T @ self.dz[l]
            self.db[l] = np.mean(self.dz[l], axis=0, keepdims=True)
            self.da[l-1] = self.dz[l] @ self.W[l].T

    def update(self, eta):
        """
        Actualiza los pesos y sesgos con descenso del gradiente.
        
        Args:
            eta (float): Tasa de aprendizaje
        """
        for l in range(1, len(self.W)):
            self.W[l] -= eta * self.dW[l]
            self.b[l] -= eta * self.db[l]

In [107]:
from sklearn.datasets import make_circles
from sklearn.metrics import accuracy_score

# Make data: Two circles on x-y plane as a classification problem
X, y = make_circles(n_samples=1000, factor=0.5, noise=0.1)
y = y.reshape(-1,1) # our model expects a 2D array of (n_sample, n_dim)

In [111]:
# Build a model
model = mlp(layersizes=[2, 4, 3, 1],
    activations=[relu, relu, sigmoid],
    derivatives=[drelu, drelu, dsigmoid],
    lossderiv=d_cross_entropy)
model.initialize()

yhat = model.forward(X)
loss = cross_entropy(y, yhat)
score = accuracy_score(y, (yhat > 0.5))

print(f'Before training- loss value {loss} accuracy {score}')

[None, array([[ 0.00021357,  0.07463643, -0.19581836, -0.0682563 ],
       [-0.046233  ,  0.02507806,  0.13704659,  0.09943072],
       [-0.02309038,  0.05123046, -0.03799564,  0.00863654],
       ...,
       [-0.02990578,  0.04890731, -0.02031112,  0.00868278],
       [-0.00454518,  0.05837392, -0.09129207,  0.00443129],
       [-0.00150395,  0.05357837, -0.06246294,  0.03335386]]), array([[-0.05862982,  0.00349199, -0.11279077],
       [-0.07053745, -0.01013143, -0.10839144],
       [-0.05751389,  0.0059618 , -0.11471994],
       ...,
       [-0.05738431,  0.00619741, -0.11479954],
       [-0.05782061,  0.00520989, -0.11389632],
       [-0.05820397,  0.00589091, -0.11816776]]), array([[-0.0603804 ],
       [-0.06017066],
       [-0.06052875],
       [-0.06045002],
       [-0.06017066],
       [-0.06035159],
       [-0.06040606],
       [-0.06034423],
       [-0.06046445],
       [-0.06017066],
       [-0.06040993],
       [-0.06047198],
       [-0.06044552],
       [-0.06033529],
   

In [82]:
# Entrenamiento por épocas
n_epochs = 150
learning_rate = 0.005

for n in range(n_epochs):
    model.forward(X)
    yhat = model.a[-1]
    model.backward(y, yhat)
    model.update(learning_rate)
    
    loss = cross_entropy(y, yhat)
    score = accuracy_score(y, (yhat > 0.5))
    
    print(f'Iteration {n} - Loss value: {loss.item():.4f} - Accuracy: {score:.4f}')

Iteration 0 - Loss value: 693.6316 - Accuracy: 0.5000
Iteration 1 - Loss value: 693.6228 - Accuracy: 0.5000
Iteration 2 - Loss value: 693.6159 - Accuracy: 0.5000
Iteration 3 - Loss value: 693.6102 - Accuracy: 0.5000
Iteration 4 - Loss value: 693.6052 - Accuracy: 0.5000
Iteration 5 - Loss value: 693.6006 - Accuracy: 0.5000
Iteration 6 - Loss value: 693.5965 - Accuracy: 0.5000
Iteration 7 - Loss value: 693.5927 - Accuracy: 0.5000
Iteration 8 - Loss value: 693.5891 - Accuracy: 0.5000
Iteration 9 - Loss value: 693.5858 - Accuracy: 0.5000
Iteration 10 - Loss value: 693.5827 - Accuracy: 0.5000
Iteration 11 - Loss value: 693.5798 - Accuracy: 0.5000
Iteration 12 - Loss value: 693.5770 - Accuracy: 0.5000
Iteration 13 - Loss value: 693.5743 - Accuracy: 0.5000
Iteration 14 - Loss value: 693.5717 - Accuracy: 0.5000
Iteration 15 - Loss value: 693.5692 - Accuracy: 0.5000
Iteration 16 - Loss value: 693.5668 - Accuracy: 0.5000
Iteration 17 - Loss value: 693.5644 - Accuracy: 0.5000
Iteration 18 - Loss 