In [1]:
import torch
import math

In [2]:
import torch
import math


class LegendrePolynomial3(torch.autograd.Function):
    """
    We can implement our own custom autograd Functions by subclassing
    torch.autograd.Function and implementing the forward and backward passes
    which operate on Tensors.
    """

    @staticmethod
    def forward(ctx, input):
        """
        In the forward pass we receive a Tensor containing the input and return
        a Tensor containing the output. ctx is a context object that can be used
        to stash information for backward computation. You can cache arbitrary
        objects for use in the backward pass using the ctx.save_for_backward method.
        """
        ctx.save_for_backward(input)
        return 0.5 * (5 * input ** 3 - 3 * input)

    @staticmethod
    def backward(ctx, grad_output):
        """
        In the backward pass we receive a Tensor containing the gradient of the loss
        with respect to the output, and we need to compute the gradient of the loss
        with respect to the input.
        """
        input, = ctx.saved_tensors
        return grad_output * 1.5 * (5 * input ** 2 - 1)


dtype = torch.float
device = torch.device("cpu")
# device = torch.device("cuda:0")  # Uncomment this to run on GPU

# Create Tensors to hold input and outputs.
# By default, requires_grad=False, which indicates that we do not need to
# compute gradients with respect to these Tensors during the backward pass.
x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
y = torch.sin(x)

# Create random Tensors for weights. For this example, we need
# 4 weights: y = a + b * P3(c + d * x), these weights need to be initialized
# not too far from the correct result to ensure convergence.
# Setting requires_grad=True indicates that we want to compute gradients with
# respect to these Tensors during the backward pass.
a = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
b = torch.full((), -1.0, device=device, dtype=dtype, requires_grad=True)
c = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
d = torch.full((), 0.3, device=device, dtype=dtype, requires_grad=True)

learning_rate = 5e-6
for t in range(2000):
    # To apply our Function, we use Function.apply method. We alias this as 'P3'.
    P3 = LegendrePolynomial3.apply

    # Forward pass: compute predicted y using operations; we compute
    # P3 using our custom autograd operation.
    y_pred = a + b * P3(c + d * x)

    # Compute and print loss
    loss = (y_pred - y).pow(2).sum()
    if t % 100 == 99:
        print(t, loss.item())

    # Use autograd to compute the backward pass.
    loss.backward()

    # Update weights using gradient descent
    with torch.no_grad():
        a -= learning_rate * a.grad
        b -= learning_rate * b.grad
        c -= learning_rate * c.grad
        d -= learning_rate * d.grad

        # Manually zero the gradients after updating weights
        a.grad = None
        b.grad = None
        c.grad = None
        d.grad = None

print(f'Result: y = {a.item()} + {b.item()} * P3({c.item()} + {d.item()} x)')

99 209.95831298828125
199 144.6602020263672
299 100.70250701904297
399 71.03519439697266
499 50.97850799560547
599 37.403133392333984
699 28.206867218017578
799 21.97317886352539
899 17.745729446411133
999 14.877889633178711
1099 12.93176555633545
1199 11.610918045043945
1299 10.714245796203613
1399 10.105476379394531
1499 9.69210433959961
1599 9.411376953125
1699 9.220744132995605
1799 9.091286659240723
1899 9.003360748291016
1999 8.943641662597656
Result: y = 4.691534383205465e-10 + -2.208526849746704 * P3(2.9543581470115043e-10 + 0.2554861009120941 x)


## nn.module

In [8]:
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)
p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)
model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch.nn.Flatten(0, 1)
)
loss_fn = torch.nn.MSELoss(reduction="sum")
lr = 1e-6
for t in range(2000):
    y_pred = model(xx)
    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())
    model.zero_grad()
    loss.backward()
    with torch.no_grad():
        for param in model.parameters():
            param -= lr * param.grad
linear_layer = model[0]

print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

99 643.7544555664062
199 432.0415954589844
299 291.06109619140625
399 197.13990783691406
499 134.54074096679688
599 92.79776000976562
699 64.94790649414062
799 46.35717010498047
899 33.94016647338867
999 25.64183235168457
1099 20.092504501342773
1199 16.379207611083984
1299 13.892725944519043
1399 12.226579666137695
1499 11.109261512756348
1599 10.359461784362793
1699 9.855839729309082
1799 9.517330169677734
1899 9.289584159851074
1999 9.136229515075684
Result: y = 0.010682877153158188 + 0.8424144387245178 x + -0.0018429744523018599 x^2 + -0.09129257500171661 x^3


# pytorch optim

In [10]:
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch.nn.Flatten(0, 1)
)
loss_fn = torch.nn.MSELoss(reduction="sum")

lr = 1e-6
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)
for t in range(2000):
    y_pred = model(xx)
    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())
        
    optimizer.zero_grad()
    
    loss.backward()
    
    optimizer.step()
    
linear_layer = model[0]
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')

99 25758.103515625
199 25649.9921875
299 25551.47265625
399 25455.9921875
499 25361.68359375
599 25267.810546875
699 25174.3828125
799 25081.306640625
899 24988.439453125
999 24895.76953125
1099 24803.30859375
1199 24711.0546875
1299 24619.0078125
1399 24527.1640625
1499 24435.5234375
1599 24344.091796875
1699 24252.86328125
1799 24161.84765625
1899 24071.029296875
1999 23980.41796875
Result: y = 0.41367146372795105 + -0.12519758939743042 x + 0.42684897780418396 x^2 + 0.2644813060760498 x^3


In [11]:
# 3rd order polynomial custom model

In [15]:
class Polynomial3(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.a = torch.nn.Parameter(torch.randn(()))
        self.b = torch.nn.Parameter(torch.randn(()))
        self.c = torch.nn.Parameter(torch.randn(()))
        self.d = torch.nn.Parameter(torch.randn(()))
        
    def forward(self, x):
        return self.a + self.b*x + self.c * x ** 2 + self.d * x **3
    
    def string(self):
        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3'
    
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

model = Polynomial3()


criterion = torch.nn.MSELoss(reduction="sum")
optimizer = torch.optim.SGD(model.parameters(), lr=1e-6)

for t in range(2000):
    y_pred = model(x)
    loss = criterion(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
print(f"Result: {model.string()}")

99 2878.159912109375
199 1962.201904296875
299 1340.14404296875
399 917.2172241210938
499 629.3530883789062
599 433.1982421875
699 299.3825378417969
799 207.99012756347656
899 145.49957275390625
999 102.7220458984375
1099 73.40538787841797
1199 53.29071807861328
1299 39.47405242919922
1399 29.972721099853516
1499 23.43163299560547
1599 18.923513412475586
1699 15.813139915466309
1799 13.664847373962402
1899 12.179523468017578
1999 11.151473045349121
Result: y = -0.04354355111718178 + 0.8813715577125549 x + 0.007511987816542387 x^2 + -0.09683389961719513 x^3
