In [27]:
from typing import Any

import numpy as np
import math

In [28]:
pi = math.pi
x = np.linspace(-pi, pi, 2000)
y = np.sin(x)

In [38]:
a = np.random.randn()
b = np.random.randn()
c = np.random.randn()
d = np.random.randn()

print(a, b, c, d)

-1.7300274064054102 -0.158000279909744 -0.21265498214611483 -0.17514174072678115


In [47]:
learning_rate = 0.000001
for t in range(2000):
    y_pred = (a*(x**3)) + (b*(x**2)) + (c*x) + d
    
    loss = np.square(y_pred - y).sum()
    if t % 100 == 99:
        print(t, loss)
    
    ### Backprop to compute gradients of a, b, c, d with respect to loss
    grad_y_pred = 2.0*(y_pred-y)
    grad_d = grad_y_pred.sum()
    grad_c = (grad_y_pred * x).sum()
    grad_b = (grad_y_pred * x ** 2).sum()
    grad_a = (grad_y_pred * x ** 3).sum()
    
    a -= learning_rate*grad_a
    b -= learning_rate*grad_b
    c -= learning_rate*grad_c
    d -= learning_rate*grad_d

print(a, b, c, d)

99 8.817165410007027
199 8.817165410007027
299 8.817165410007025
399 8.817165410007025
499 8.817165410007025
599 8.817165410007025
699 8.817165410007025
799 8.817165410007027
899 8.817165410007025
999 8.817165410007025
1099 8.817165410007027
1199 8.817165410007025
1299 8.817165410007027
1399 8.817165410007027
1499 8.817165410007025
1599 8.817165410007027
1699 8.817165410007025
1799 8.817165410007025
1899 8.817165410007025
1999 8.817165410007025
-0.09333038904059505 7.547293931799305e-16 0.8567408430737578 -4.374517070295505e-15


In [48]:
import torch
import math

In [49]:
dtype = torch.float
device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.set_default_device(device)

In [53]:
class LegendrePolynomial3(torch.autograd.Function):
    @staticmethod
    def forward(ctx, input, *args, **kwargs):
        ctx.save_for_backward(input)
        return 0.5*(5*input**3-3*input)
    
    @staticmethod
    def backward(ctx, grad_outputs):
        input, = ctx.saved_tensors
        return grad_outputs * ((15/2) * input ** 2 - 3*1)

In [54]:
dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0")  # Uncomment this to run on GPU

# Create Tensors to hold input and outputs.
# By default, requires_grad=False, which indicates that we do not need to
# compute gradients with respect to these Tensors during the backward pass.
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# Create random Tensors for weights. For this example, we need
# 4 weights: y = a + b * P3(c + d * x), these weights need to be initialized
# not too far from the correct result to ensure convergence.
# Setting requires_grad=True indicates that we want to compute gradients with
# respect to these Tensors during the backward pass.
a = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
b = torch.full((), -1.0, device=device, dtype=dtype, requires_grad=True)
c = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
d = torch.full((), 0.3, device=device, dtype=dtype, requires_grad=True)

learning_rate = 5e-6
for t in range(2000):
    # To apply our Function, we use Function.apply method. We alias this as 'P3'.
    P3 = LegendrePolynomial3.apply

    # Forward pass: compute predicted y using operations; we compute
    # P3 using our custom autograd operation.
    y_pred = a + b * P3(c + d * x)

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 99:
        print(t, loss.item())

    # Use autograd to compute the backward pass.
    loss.backward()

    # Update weights using gradient descent
    with torch.no_grad():
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad

        # Manually zero the gradients after updating weights
        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None

print(f'Result: y = {a.item()} + {b.item()} * P3({c.item()} + {d.item()} x)')

99 291.9667053222656
199 218.27239990234375
299 162.18350219726562
399 119.98851776123047
499 88.59320068359375
599 65.48358154296875
699 48.65519332885742
799 36.534183502197266
899 27.901119232177734
999 21.821998596191406
1099 17.5905818939209
1199 14.67898941040039
1299 12.69812297821045
1399 11.365156173706055
1499 10.477325439453125
1599 9.891486167907715
1699 9.508221626281738
1799 9.259451866149902
1899 9.099014282226562
1999 8.996124267578125
Result: y = -3.1605404404722037e-10 + -2.2151637077331543 * P3(4.5359185851800987e-10 + 0.25649651885032654 x)
