In [1]:
# -*- coding: utf-8 -*-

import torch
import math


dtype = torch.float
device = torch.device("cpu")
#device = torch.device("cuda:0") # Uncomment this to run on GPU

# Create random input and output data
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# Randomly initialize weights
a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

learning_rate = 1e-6
for t in range(2000):
    # Forward pass: compute predicted y
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights using gradient descent
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d


print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')


99 2028.7823486328125
199 1394.3521728515625
299 960.2670288085938
399 662.9187622070312
499 459.0013122558594
599 318.9984436035156
699 222.76805114746094
799 156.5503692626953
899 110.93441009521484
999 79.4761734008789
1099 57.75819396972656
1199 42.74884796142578
1299 32.365116119384766
1399 25.174196243286133
1499 20.189363479614258
1599 16.730594635009766
1699 14.328436851501465
1799 12.658583641052246
1899 11.496784210205078
1999 10.687759399414062
Result: y = -0.04106452316045761 + 0.8381562232971191 x + 0.007084311917424202 x^2 + -0.09068688750267029 x^3


In [2]:
import numpy as np
import math

# Create random input and output data
x = np.linspace(-math.pi, math.pi, 2000)
y = np.sin(x)

# Randomly initialize weights
a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
d = np.random.randn()

learning_rate = 1e-6
for t in range(2000):
    # Forward pass: compute predicted y
    # y = a + b x + c x^2 + d x^3
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss
    loss = np.square(y_pred - y).sum()
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d

print(f'Result: y = {a} + {b} x + {c} x^2 + {d} x^3')

99 1256.4037648716535
199 837.0155920890758
299 558.7506632890426
399 374.079281187389
499 251.4921293101732
599 170.09651707575566
699 116.03670756662885
799 80.12197397513496
899 56.25469491683986
999 40.3884861453285
1099 29.83753404412832
1199 22.81866739986385
1299 18.147685503485935
1399 15.03794168182089
1499 12.966722571911895
1599 11.586584238189047
1699 10.666505807260641
1799 10.05282348627072
1899 9.643289330642109
1999 9.369840575632065
Result: y = -0.01045724831398066 + 0.8360038885828688 x + 0.001804049343524704 x^2 + -0.09038073784325455 x^3
