In [10]:
import torch
import torch.nn as nn
import torch.optim as optim

# 동일한 난수 시드
torch.manual_seed(0)

# =====================================
# 1. 간단한 PyTorch 모델 & 초기 파라미터
# =====================================
class SimpleMLP(nn.Module):
    def __init__(self):
        super(SimpleMLP, self).__init__()
        self.fc1 = nn.Linear(4, 3)  # in=4, out=3
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(3, 2)  # in=3, out=2
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

model = SimpleMLP()

# Hook으로 forward 시점의 파라미터를 확인해봅시다.
def weight_hook(module, input, output):
    if isinstance(module, nn.Linear):
        print(f"[Forward Hook] {module}의 가중치:")
        print(module.weight.data)
        print(f"[Forward Hook] {module}의 편향:")
        print(module.bias.data)
        print("------------------------------------------------")

model.fc1.register_forward_hook(weight_hook)
model.fc2.register_forward_hook(weight_hook)


[Forward Hook] Linear(in_features=4, out_features=3, bias=True)의 가중치:
tensor([[-0.0037,  0.2682, -0.4115, -0.3680],
        [-0.1926,  0.1341, -0.0099,  0.3964],
        [-0.0444,  0.1323, -0.1511, -0.0983]])
[Forward Hook] Linear(in_features=4, out_features=3, bias=True)의 편향:
tensor([-0.4777, -0.3311, -0.2061])
------------------------------------------------
[Forward Hook] Linear(in_features=3, out_features=2, bias=True)의 가중치:
tensor([[ 0.0214,  0.2282,  0.3464],
        [-0.3914, -0.2514,  0.2097]])
[Forward Hook] Linear(in_features=3, out_features=2, bias=True)의 편향:
tensor([ 0.4794, -0.1188])
------------------------------------------------
=== PyTorch로 업데이트된 최종 파라미터 ===
fc1.weight:
 tensor([[-0.0037,  0.2682, -0.4115, -0.3680],
        [-0.1416,  0.1264, -0.0283,  0.4303],
        [-0.0444,  0.1323, -0.1511, -0.0983]])
fc1.bias:
 tensor([-0.4777, -0.3446, -0.2061])
fc2.weight:
 tensor([[ 0.0214,  0.2695,  0.3464],
        [-0.3914, -0.2426,  0.2097]])
fc2.bias:
 tensor([ 0.5239, -

In [12]:

# ============================================
# 2. 예시용 입력 x, 타깃 y (batch=2, output=2)
# ============================================
x = torch.randn(2, 4)
y = torch.randn(2, 2)

# ==============================================
# 3. PyTorch forward/backward/step 한 번 수행
# ==============================================
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

# 파라미터 업데이트 전(초기) 파라미터를 복사해두기
# --> 이 값을 가지고 수동 연산과 비교할 것
init_W1 = model.fc1.weight.detach().clone()
init_b1 = model.fc1.bias.detach().clone()
init_W2 = model.fc2.weight.detach().clone()
init_b2 = model.fc2.bias.detach().clone()

# --- forward ---
pred = model(x)
loss = criterion(pred, y)

# --- backward ---
optimizer.zero_grad()
loss.backward()

# --- update ---
optimizer.step()

print("=== PyTorch로 업데이트된 최종 파라미터 ===")
print("fc1.weight:\n", model.fc1.weight.data)
print("fc1.bias:\n", model.fc1.bias.data)
print("fc2.weight:\n", model.fc2.weight.data)
print("fc2.bias:\n", model.fc2.bias.data)
print("==================================================\n")


[Forward Hook] Linear(in_features=4, out_features=3, bias=True)의 가중치:
tensor([[-0.0037,  0.2682, -0.4115, -0.3680],
        [-0.1416,  0.1264, -0.0283,  0.4303],
        [-0.0444,  0.1323, -0.1511, -0.0983]])
[Forward Hook] Linear(in_features=4, out_features=3, bias=True)의 편향:
tensor([-0.4777, -0.3446, -0.2061])
------------------------------------------------
[Forward Hook] Linear(in_features=3, out_features=2, bias=True)의 가중치:
tensor([[ 0.0214,  0.2695,  0.3464],
        [-0.3914, -0.2426,  0.2097]])
[Forward Hook] Linear(in_features=3, out_features=2, bias=True)의 편향:
tensor([ 0.5725, -0.0086])
------------------------------------------------
=== PyTorch로 업데이트된 최종 파라미터 ===
fc1.weight:
 tensor([[ 0.0171,  0.2728, -0.3823, -0.3625],
        [-0.1416,  0.1264, -0.0283,  0.4303],
        [-0.0572,  0.1295, -0.1692, -0.1017]])
fc1.bias:
 tensor([-0.5022, -0.3446, -0.1910])
fc2.weight:
 tensor([[ 0.0217,  0.2695,  0.3465],
        [-0.3883, -0.2426,  0.2103]])
fc2.bias:
 tensor([0.6073, 0.

In [13]:

# ===============================================
# 4. 위에서 복사해둔 '초기' 파라미터로 수동 연산
# ===============================================
W1 = init_W1.clone()
b1 = init_b1.clone()
W2 = init_W2.clone()
b2 = init_b2.clone()

# -- 수동 forward 구현 --
def forward_manual(x, W1, b1, W2, b2):
    """
    x: (2,4)
    W1: (3,4)
    b1: (3,)
    W2: (2,3)
    b2: (2,)
    """
    # fc1: z1 = x.mm(W1^T) + b1
    #      => shape(2,3)
    z1 = x.mm(W1.t()) + b1
    # relu
    a1 = torch.relu(z1)
    # fc2: z2 = a1.mm(W2^T) + b2
    #      => shape(2,2)
    z2 = a1.mm(W2.t()) + b2
    
    return z2, a1, z1

# -- 수동 backward 구현 --
def backward_manual(x, y, out, a1, z1, W1, b1, W2, b2):
    """
    x:   (2,4)
    y:   (2,2)
    out: (2,2)  (forward_manual 결과)
    a1:  (2,3)  (middle layer activation)
    z1:  (2,3)  (middle layer linear output)
    
    W1:  (3,4)
    b1:  (3,)
    W2:  (2,3)
    b2:  (2,)
    """
    # 1) loss = mean((out - y)^2)
    #    shape (2,2) => 총 4개 요소
    #    dLoss/dOut = (out - y) * (2 / 4) = (out - y) / 2
    loss = ((out - y) ** 2).mean()
    dLoss_dOut = (out - y) / 2.0  # shape (2,2)

    # 2) fc2: z2 = a1.mm(W2^T) + b2
    #    => out = z2
    #    dLoss/dW2 = dLoss/dz2 . a1 (broadcast)
    #       (2,2).T mm (2,3) => (2,3)
    #    dLoss/db2 = sum(dLoss/dz2) over batch
    #       (2,2) => sum along dim=0 => (2,)
    dW2 = dLoss_dOut.t().mm(a1)     # shape (2,3)
    db2 = dLoss_dOut.sum(dim=0)     # shape (2,)

    # 3) middle layer (ReLU)
    #    dLoss/d(a1) = dLoss/dOut mm W2
    #       shape: (2,2) mm (2,3) => (2,3)
    dLoss_da1 = dLoss_dOut.mm(W2)   # (2,3)
    relu_mask = (z1 > 0).float()    # (2,3)
    dLoss_dz1 = dLoss_da1 * relu_mask  # (2,3)

    # 4) fc1: z1 = x.mm(W1^T) + b1
    #    => dW1 = (dLoss_dz1).T mm x
    #       => shape (3,4)
    #    => db1 = sum(dLoss_dz1, dim=0)
    #       => shape (3,)
    dW1 = dLoss_dz1.t().mm(x)       # (3,4)
    db1 = dLoss_dz1.sum(dim=0)      # (3,)

    return loss, dW1, db1, dW2, db2

# -- 수동 forward & backward & update(1 step) --
out, a1, z1 = forward_manual(x, W1, b1, W2, b2)
loss_manual, gW1, gb1, gW2, gb2 = backward_manual(x, y, out, a1, z1, W1, b1, W2, b2)

lr = 0.1
W1 = W1 - lr * gW1
b1 = b1 - lr * gb1
W2 = W2 - lr * gW2
b2 = b2 - lr * gb2

print("=== 수동 연산으로 업데이트된 최종 파라미터 ===")
print("W1:\n", W1)
print("b1:\n", b1)
print("W2:\n", W2)
print("b2:\n", b2)
print("==================================================\n")

# ==========================================
# 5. PyTorch vs 수동 파라미터 차이 비교
# ==========================================
diff_w1 = (model.fc1.weight.data - W1).abs().max().item()
diff_b1 = (model.fc1.bias.data   - b1).abs().max().item()
diff_w2 = (model.fc2.weight.data - W2).abs().max().item()
diff_b2 = (model.fc2.bias.data   - b2).abs().max().item()

print("=== PyTorch vs Manual 최종 파라미터 차이(절댓값 최댓값) ===")
print("fc1.weight 차이:", diff_w1)
print("fc1.bias   차이:", diff_b1)
print("fc2.weight 차이:", diff_w2)
print("fc2.bias   차이:", diff_b2)


=== 수동 연산으로 업데이트된 최종 파라미터 ===
W1:
 tensor([[ 0.0171,  0.2728, -0.3823, -0.3625],
        [-0.1416,  0.1264, -0.0283,  0.4303],
        [-0.0572,  0.1295, -0.1692, -0.1017]])
b1:
 tensor([-0.5022, -0.3446, -0.1910])
W2:
 tensor([[ 0.0217,  0.2695,  0.3465],
        [-0.3883, -0.2426,  0.2103]])
b2:
 tensor([0.6073, 0.0631])

=== PyTorch vs Manual 최종 파라미터 차이(절댓값 최댓값) ===
fc1.weight 차이: 0.0
fc1.bias   차이: 0.0
fc2.weight 차이: 0.0
fc2.bias   차이: 0.0
