In [1]:
import numpy as np
from model import *
from layers import *

In [2]:
def numerical_gradient_conv(model, layer, X, y, loss_fn, eps=1e-5):
    num_grad = np.zeros_like(layer.w)

    it = np.nditer(layer.w, flags=['multi_index'], op_flags=['readwrite'])

    while not it.finished:
        idx = it.multi_index
        original = layer.w[idx]

        # w + eps
        layer.w[idx] = original + eps
        loss_plus = loss_fn.forward_pass(model.forward(X), y)

        # w - eps
        layer.w[idx] = original - eps
        loss_minus = loss_fn.forward_pass(model.forward(X), y)

        # restaura
        layer.w[idx] = original

        num_grad[idx] = (loss_plus - loss_minus) / (2 * eps)
        it.iternext()

    return num_grad


In [3]:
def relative_error(a, b, eps=1e-8):
    return np.linalg.norm(a - b) / (np.linalg.norm(a) + np.linalg.norm(b) + eps)


In [4]:
np.random.seed(42)

# batch pequeno
N = 1
C = 1
H = 5
W = 5

X = np.random.randn(N, H, W, C)


In [5]:
num_classes = 3
y = np.array([1])   # classe v√°lida entre [0, num_classes-1]
y_onehot = np.eye(num_classes)[y]
y_onehot.shape

(1, 3)

In [6]:
model = Model()
model.add(Conv(1, 2, kernel_size=3))
model.add(ReLU())
model.add(Flatten())
model.add(Dense(18, num_classes))


In [7]:
loss_fn = CrossEntropyLoss()

# forward
y_pred = model.forward(X)
loss = loss_fn.forward_pass(y_pred, y_onehot)

# backward
grad_loss = loss_fn.backward_pass()
model.backward(grad_loss)

# pega a camada conv
conv = model.layers[0]
grad_analitico = conv.dw


In [8]:
grad_numerico = numerical_gradient_conv(
    model, conv, X, y_onehot, loss_fn
)

erro = relative_error(grad_analitico, grad_numerico)
print("Erro relativo CNN:", erro)


Erro relativo CNN: 1.6152721214048323e-09
