In [5]:
import numpy as np
import torch
import matplotlib.pyplot as plt
import math

$$y = \sin (x)$$

In [9]:
# Create random input and output data
x = np.linspace(-np.pi, np.pi, 2000)
y = np.sin(x)

# Randomly initialize weights
a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
d = np.random.randn()

learning_rate = 1e-6

for t in range(2000):
    # Forward pass: compute predicted y
    # y = a + bx + cx^2 + dx^3
    y_pred = a + b * x + c * x**2 + d * x**3
    
    # Compute and print loss
    loss = np.square(y_pred - y).sum()
    if t % 100 == 99:
        print(t, loss)
        
    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x**2).sum()
    grad_d = (grad_y_pred * x**3).sum()
    
    # Update weights
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d
    
print(f'Result: y = {round(a,2)} + {round(b, 2)}x + {round(c, 2)}x^2 + {round(d, 2)}x^3')
    

99 454.8639129701638
199 306.53879269993894
299 207.648437208392
399 141.68335446133244
499 97.65786852557036
599 68.2586565370522
699 48.61507732541509
799 35.481859417947696
899 26.695687165046518
999 20.813769206643162
1099 16.873356438972827
1199 14.23167210581233
1299 12.459321715509928
1399 11.26928395923457
1499 10.469583456165424
1599 9.931732006824262
1699 9.569673835525085
1799 9.32573112934094
1899 9.16121711394337
1999 9.050162847004042
Result: y = 0.01 + 0.84x + -0.0x^2 + -0.09x^3


Using Pytorch

In [12]:
import torch
import numpy as np

dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0") # Uncomment this to run on GPU

# Create random input and output data
x = torch.linspace(-np.pi, np.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# Randomly initialize weights
a = torch.randn((), device=device, dtype=dtype)
b = torch.randn((), device=device, dtype=dtype)
c = torch.randn((), device=device, dtype=dtype)
d = torch.randn((), device=device, dtype=dtype)

learning_rate = 1e-6
for t in range(2000):
    # Forward pass: compute predicted y
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum().item()
    if t % 100 == 99:
        print(t, loss)

    # Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0 * (y_pred - y)
    grad_a = grad_y_pred.sum()
    grad_b = (grad_y_pred * x).sum()
    grad_c = (grad_y_pred * x ** 2).sum()
    grad_d = (grad_y_pred * x ** 3).sum()

    # Update weights using gradient descent
    a -= learning_rate * grad_a
    b -= learning_rate * grad_b
    c -= learning_rate * grad_c
    d -= learning_rate * grad_d


print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 1364.7852783203125
199 913.1926879882812
299 612.3138427734375
399 411.7562255859375
499 278.0038757324219
599 188.7583465576172
699 129.177001953125
799 89.37721252441406
899 62.77549362182617
999 44.98389434814453
1099 33.076942443847656
1199 25.102739334106445
1299 19.758554458618164
1399 16.174243927001953
1499 13.768417358398438
1599 12.152308464050293
1699 11.065805435180664
1799 10.334689140319824
1899 9.842283248901367
1999 9.510354042053223
Result: y = -0.016184814274311066 + 0.8359057903289795 x + 0.00279215001501143 x^2 + -0.09036678075790405 x^3


In [13]:
# Same using autograd
import torch
import math

dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0")  # Uncomment this to run on GPU

# Create Tensors to hold input and outputs.
# By default, requires_grad=False, which indicates that we do not need to
# compute gradients with respect to these Tensors during the backward pass.
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# Create random Tensors for weights. For a third order polynomial, we need
# 4 weights: y = a + b x + c x^2 + d x^3
# Setting requires_grad=True indicates that we want to compute gradients with
# respect to these Tensors during the backward pass.
a = torch.randn((), device=device, dtype=dtype, requires_grad=True)
b = torch.randn((), device=device, dtype=dtype, requires_grad=True)
c = torch.randn((), device=device, dtype=dtype, requires_grad=True)
d = torch.randn((), device=device, dtype=dtype, requires_grad=True)

learning_rate = 1e-6
for t in range(2000):
    # Forward pass: compute predicted y using operations on Tensors.
    y_pred = a + b * x + c * x ** 2 + d * x ** 3

    # Compute and print loss using operations on Tensors.
    # Now loss is a Tensor of shape (1,)
    # loss.item() gets the scalar value held in the loss.
    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 99:
        print(t, loss.item())

    # Use autograd to compute the backward pass. This call will compute the
    # gradient of loss with respect to all Tensors with requires_grad=True.
    # After this call a.grad, b.grad. c.grad and d.grad will be Tensors holding
    # the gradient of the loss with respect to a, b, c, d respectively.
    loss.backward()

    # Manually update weights using gradient descent. Wrap in torch.no_grad()
    # because weights have requires_grad=True, but we don't need to track this
    # in autograd.
    with torch.no_grad():
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad

        # Manually zero the gradients after updating weights
        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None

print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')

99 1828.9029541015625
199 1218.900146484375
299 813.6212158203125
399 544.27490234375
499 365.2104797363281
599 246.12547302246094
699 166.90003967285156
799 114.17224884033203
899 79.0655517578125
999 55.68102264404297
1099 40.09748458862305
1199 29.70762825012207
1299 22.77703857421875
1399 18.151464462280273
1499 15.062579154968262
1599 12.998686790466309
1699 11.618830680847168
1799 10.695684432983398
1899 10.077668190002441
1999 9.663641929626465
Result: y = -0.01488934550434351 + 0.8319839835166931 x + 0.0025686626322567463 x^2 + -0.08980894088745117 x^3
