In [1]:
import numpy as np
import math

# Create random input and output data
x = np.linspace(-math.pi, math.pi, 2000)
y = np.sin(x)

# Randomly initialize weights
a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
d = np.random.randn()

learning_rate = 1e-6
for t in range(2000):
    # Forward pass: compute predicted y
    # y = a + b x + c x^2 + d x^3
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss
    loss = np.square(y_pred - y).sum()
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d

print(f'Result: y = {a} + {b} x + {c} x^2 + {d} x^3')


99 1818.8646528883805
199 1258.1428109936319
299 872.0386728122604
399 605.8798526405612
499 422.2060806494055
599 295.3196649813514
699 207.57194878481425
799 146.82849535622387
899 104.7368711435385
999 75.54153861264342
1099 55.27212539529501
1199 41.18679720944307
1299 31.39012707415213
1399 24.57046090960454
1499 19.819209083300972
1599 16.50637002727583
1699 14.194694046330909
1799 12.580428816762282
1899 11.452371523467786
1999 10.66354249241296
Result: y = -0.04212234272139479 + 0.8409836406814618 x + 0.00726680503824895 x^2 + -0.09108906421521662 x^3


In [1]:
import torch
import math


dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0") # Uncomment this to run on GPU

# Create random input and output data
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# Randomly initialize weights
a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

learning_rate = 1e-6
for t in range(2000):
    # Forward pass: compute predicted y
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights using gradient descent
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d


print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 4712.939453125
199 3190.07763671875
299 2162.428955078125
399 1468.27734375
499 998.9253540039062
599 681.2455444335938
699 465.9994812011719
799 320.0024719238281
899 220.8678436279297
999 153.4794158935547
1099 107.61955261230469
1199 76.37545776367188
1299 55.06476974487305
1399 40.51273727416992
1499 30.564380645751953
1599 23.755582809448242
1699 19.090112686157227
1799 15.889596939086914
1899 13.691570281982422
1999 12.180298805236816
Result: y = -0.04889773949980736 + 0.8226612210273743 x + 0.008435674011707306 x^2 + -0.08848285675048828 x^3
