In [1]:
import torch
import math

dtype = torch.float
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

x = torch.linspace(1, math.pi, 3, device=device, dtype=dtype)
y = torch.sin(x)
print("y : {}".format(y))
a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)
print("{} {} {} {}".format(a,b,c,d))
lr = 0.0006

# x를 가지고 y값을 예측.
for t in range(10000):
    # - 순전파 단계
    y_pred = a + b*x + c*x**2 + d*x**3 # 3차식으로 y값을 예측
    loss = (y_pred-y).pow(2).sum().item() # 모든 요소에 대해서, 오차 제곱의 합을 구함
    if t % 1000 == 0:
        print("y_pred : {}    loss : {}".format(y_pred, loss))
    
    # - 역전파 단계
    # 오차와 x의 차수들로 변화율을 구한 다음, 학습률과 곱하여, 각 가중치들을, 갱신한다.
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()
    if t % 1000 == 0:
        print("grad_y_pred : ", grad_y_pred)
        print("grad_a : ", grad_a)
        print("grad_b : ", grad_b)
        print("grad_c : ", grad_c)
        print("grad_d : ", grad_d)
        print()
    a -= grad_a * lr
    b -= grad_b * lr
    c -= grad_c * lr
    d -= grad_d * lr
print("{} {} {} {}".format(a,b,c,d))

y : tensor([ 8.4147e-01,  8.7758e-01, -8.7423e-08], device='cuda:0')
0.0026981367263942957 0.7142823934555054 0.7385696172714233 0.6624144911766052
y_pred : tensor([ 2.1180, 10.5312, 30.0751], device='cuda:0')    loss : 999.3323364257812
grad_y_pred :  tensor([ 2.5530, 19.3072, 60.1502], device='cuda:0')
grad_a :  tensor(82.0104, device='cuda:0')
grad_b :  tensor(231.5017, device='cuda:0')
grad_c :  tensor(679.0046, device='cuda:0')
grad_d :  tensor(2039.0337, device='cuda:0')

y_pred : tensor([ 0.6633,  1.0102, -0.0328], device='cuda:0')    loss : 0.05040588229894638
grad_y_pred :  tensor([-0.3563,  0.2653, -0.0657], device='cuda:0')
grad_a :  tensor(-0.1567, device='cuda:0')
grad_b :  tensor(-0.0133, device='cuda:0')
grad_c :  tensor(0.1332, device='cuda:0')
grad_d :  tensor(-0.0366, device='cuda:0')

y_pred : tensor([ 0.7039,  0.9794, -0.0252], device='cuda:0')    loss : 0.02992589771747589
grad_y_pred :  tensor([-0.2752,  0.2036, -0.0503], device='cuda:0')
grad_a :  tensor(-0.1219,

tensor(27)