In [None]:
!pip install micrograd

Collecting micrograd
  Downloading micrograd-0.1.0-py3-none-any.whl.metadata (2.6 kB)
Downloading micrograd-0.1.0-py3-none-any.whl (4.9 kB)
Installing collected packages: micrograd
Successfully installed micrograd-0.1.0


In [None]:
import numpy as np
from micrograd.engine import Value
import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
# Función de activación Sigmoid y su derivada
def sigmoid(x):
    return 1 / (1 + np.exp(-np.clip(x, -500, 500)))

def sigmoid_derivative(x):
    s = sigmoid(x)
    return s * (1 - s)

# Función de activación ReLU y su derivada
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

# Inicializar datos
X_s = np.array([
    [2.5, 3.5, -0.5],
    [4.0, -1.0, 0.5],
    [0.5, 1.5, 1.0],
    [3.0, 2.0, -1.5]
])

y_s = np.array([[1.0], [0.0], [0.0], [1.0]])

# Definir arquitectura de la red
input_size = 3
hidden_size = 4
output_size = 1
learning_rate = 0.01

# Inicializar pesos y sesgos
np.random.seed(42)
W1 = np.random.randn(hidden_size, input_size) * 0.1
b1 = np.random.randn(hidden_size, 1) * 0.1
W2 = np.random.randn(hidden_size, hidden_size) * 0.1
b2 = np.random.randn(hidden_size, 1) * 0.1
W3 = np.random.randn(output_size, hidden_size) * 0.1
b3 = np.random.randn(output_size, 1) * 0.1

# Propagación hacia adelante
def forward_propagation(X):
    Z1 = np.dot(W1, X.T) + b1  # Primera capa oculta
    A1 = relu(Z1)

    Z2 = np.dot(W2, A1) + b2  # Segunda capa oculta
    A2 = relu(Z2)

    Z3 = np.dot(W3, A2) + b3  # Capa de salida
    A3 = sigmoid(Z3)

    return Z1, A1, Z2, A2, Z3, A3

# Propagación hacia atrás
def backward_propagation(X, y, Z1, A1, Z2, A2, Z3, A3):
    m = X.shape[0]

    dZ3 = A3 - y.T
    dW3 = (1 / m) * np.dot(dZ3, A2.T)
    db3 = (1 / m) * np.sum(dZ3, axis=1, keepdims=True)

    dA2 = np.dot(W3.T, dZ3)
    dZ2 = dA2 * relu_derivative(Z2)
    dW2 = (1 / m) * np.dot(dZ2, A1.T)
    db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True)

    dA1 = np.dot(W2.T, dZ2)
    dZ1 = dA1 * relu_derivative(Z1)
    dW1 = (1 / m) * np.dot(dZ1, X)
    db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)

    return dW1, db1, dW2, db2, dW3, db3

# Actualizar parámetros
def update_parameters(dW1, db1, dW2, db2, dW3, db3):
    global W1, b1, W2, b2, W3, b3
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2
    W3 -= learning_rate * dW3
    b3 -= learning_rate * db3

# Bucle de entrenamiento
epochs = 1000
for epoch in range(epochs):
    # Propagación hacia adelante
    Z1, A1, Z2, A2, Z3, A3 = forward_propagation(X_s)

    # Calcular la pérdida (error cuadrático medio)
    loss = np.mean((A3 - y_s.T) ** 2)

    # Propagación hacia atrás
    dW1, db1, dW2, db2, dW3, db3 = backward_propagation(X_s, y_s, Z1, A1, Z2, A2, Z3, A3)

    # Actualizar pesos y sesgos
    update_parameters(dW1, db1, dW2, db2, dW3, db3)

    # Imprimir la pérdida cada 100 épocas
    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {loss}")

# Salida final
_, _, _, _, _, A3 = forward_propagation(X_s)
print("\Salida:")
print(A3.T)

Epoch 0, Loss: 0.24971877880042945
Epoch 100, Loss: 0.24933660645515116
Epoch 200, Loss: 0.24892306028616218
Epoch 300, Loss: 0.248469448364932
Epoch 400, Loss: 0.24787026357066994
Epoch 500, Loss: 0.24703075238069847
Epoch 600, Loss: 0.2458039996735075
Epoch 700, Loss: 0.243949489101394
Epoch 800, Loss: 0.2410741738384928
Epoch 900, Loss: 0.2365438186789336
\Salida:
[[0.50992698]
 [0.46374528]
 [0.47275238]
 [0.50986337]]


In [None]:
# Verificación usando Micrograd
a = Value(-4.0)
b = Value(2.0)
c = a + b
d = a * b + b * a + b ** 3
c += c + 1 + c + (-a)
d += d * 2 + (b - a).relu()
d += 3 * d + (b - a).relu()
e = c - d
f = e ** 2
g = f / 2.0
g += 10.0 / f
print(f"{g.data:.4f}")  # imprime el resultado de esta pasada hacia adelante
g.backward()
print(f"{a.grad:.4f}")  # imprime el valor numérico de dg/da
print(f"{b.grad:.4f}")  # imprime el valor numérico de dg/db

# Verificación usando PyTorch
X_torch = torch.tensor(X_s, dtype=torch.float32)
y_torch = torch.tensor(y_s, dtype=torch.float32)

model = nn.Sequential(
    nn.Linear(input_size, hidden_size),
    nn.ReLU(),
    nn.Linear(hidden_size, hidden_size),
    nn.ReLU(),
    nn.Linear(hidden_size, output_size),
    nn.Sigmoid()
)

criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

# Bucle de entrenamiento para PyTorch
for epoch in range(epochs):
    # Paso hacia adelante
    outputs = model(X_torch)
    loss = criterion(outputs, y_torch)

    # Paso hacia atrás y optimización
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Imprimir la pérdida cada 100 épocas
    if epoch % 100 == 0:
        print(f"[PyTorch] Época {epoch}, Pérdida: {loss.item()}")

# Salida final usando PyTorch
with torch.no_grad():
    final_output = model(X_torch)
    print("\nSalida final usando PyTorch:")
    print(final_output.numpy())

# Función de activación Sigmoid y su derivada
def sigmoid(x):
    return 1 / (1 + np.exp(-np.clip(x, -500, 500)))

def sigmoid_derivative(x):
    s = sigmoid(x)
    return s * (1 - s)

# Función de activación ReLU y su derivada
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

# Inicializar datos
X_s = np.array([
    [2.5, 3.5, -0.5],
    [4.0, -1.0, 0.5],
    [0.5, 1.5, 1.0],
    [3.0, 2.0, -1.5]
])

y_s = np.array([[1.0], [0.0], [0.0], [1.0]])

# Definir arquitectura de la red
input_size = 3
hidden_size = 4
output_size = 1
learning_rate = 0.01

# Inicializar pesos y sesgos
np.random.seed(42)
W1 = np.random.randn(hidden_size, input_size) * 0.1
b1 = np.random.randn(hidden_size, 1) * 0.1
W2 = np.random.randn(hidden_size, hidden_size) * 0.1
b2 = np.random.randn(hidden_size, 1) * 0.1
W3 = np.random.randn(output_size, hidden_size) * 0.1
b3 = np.random.randn(output_size, 1) * 0.1

# Propagación hacia adelante
def forward_propagation(X):
    Z1 = np.dot(W1, X.T) + b1  # Primera capa oculta
    A1 = relu(Z1)

    Z2 = np.dot(W2, A1) + b2  # Segunda capa oculta
    A2 = relu(Z2)

    Z3 = np.dot(W3, A2) + b3  # Capa de salida
    A3 = sigmoid(Z3)

    return Z1, A1, Z2, A2, Z3, A3

# Propagación hacia atrás
def backward_propagation(X, y, Z1, A1, Z2, A2, Z3, A3):
    m = X.shape[0]

    dZ3 = A3 - y.T
    dW3 = (1 / m) * np.dot(dZ3, A2.T)
    db3 = (1 / m) * np.sum(dZ3, axis=1, keepdims=True)

    dA2 = np.dot(W3.T, dZ3)
    dZ2 = dA2 * relu_derivative(Z2)
    dW2 = (1 / m) * np.dot(dZ2, A1.T)
    db2 = (1 / m) * np.sum(dZ2, axis=1, keepdims=True)

    dA1 = np.dot(W2.T, dZ2)
    dZ1 = dA1 * relu_derivative(Z1)
    dW1 = (1 / m) * np.dot(dZ1, X)
    db1 = (1 / m) * np.sum(dZ1, axis=1, keepdims=True)

    return dW1, db1, dW2, db2, dW3, db3

# Actualizar parámetros
def update_parameters(dW1, db1, dW2, db2, dW3, db3):
    global W1, b1, W2, b2, W3, b3
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2
    W3 -= learning_rate * dW3
    b3 -= learning_rate * db3

# Bucle de entrenamiento
epochs = 1000
for epoch in range(epochs):
    # Propagación hacia adelante
    Z1, A1, Z2, A2, Z3, A3 = forward_propagation(X_s)

    # Calcular la pérdida (error cuadrático medio)
    loss = np.mean((A3 - y_s.T) ** 2)

    # Propagación hacia atrás
    dW1, db1, dW2, db2, dW3, db3 = backward_propagation(X_s, y_s, Z1, A1, Z2, A2, Z3, A3)

    # Actualizar pesos y sesgos
    update_parameters(dW1, db1, dW2, db2, dW3, db3)

    # Imprimir la pérdida cada 100 épocas
    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {loss}")

# Salida final
_, _, _, _, _, A3 = forward_propagation(X_s)
print("\nSalida final:")
print(A3.T)


2112.5024
-2664.9970
-3249.9964
[PyTorch] Época 0, Pérdida: 0.22794649004936218
[PyTorch] Época 100, Pérdida: 0.21062523126602173
[PyTorch] Época 200, Pérdida: 0.19227643311023712
[PyTorch] Época 300, Pérdida: 0.1730826199054718
[PyTorch] Época 400, Pérdida: 0.15566685795783997
[PyTorch] Época 500, Pérdida: 0.14137528836727142
[PyTorch] Época 600, Pérdida: 0.12852926552295685
[PyTorch] Época 700, Pérdida: 0.11658534407615662
[PyTorch] Época 800, Pérdida: 0.10531941056251526
[PyTorch] Época 900, Pérdida: 0.09473837912082672

Salida final usando PyTorch:
[[0.64221144]
 [0.06450532]
 [0.2831343 ]
 [0.6428142 ]]
Epoch 0, Loss: 0.24971877880042945
Epoch 100, Loss: 0.24933660645515116
Epoch 200, Loss: 0.24892306028616218
Epoch 300, Loss: 0.248469448364932
Epoch 400, Loss: 0.24787026357066994
Epoch 500, Loss: 0.24703075238069847
Epoch 600, Loss: 0.2458039996735075
Epoch 700, Loss: 0.243949489101394
Epoch 800, Loss: 0.2410741738384928
Epoch 900, Loss: 0.2365438186789336

Salida final:
[[0.5099