In [1]:
import os
import sys

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

In [None]:
from optimizer import SGD, AdaGrad, AdaDelta
from torch.nn import Parameter

import torch

In [None]:
def loss_fn(x: torch.Tensor) -> torch.Tensor:
    return (x-3) ** 2

In [None]:
x = Parameter(data=torch.tensor([0.0]), requires_grad=True)
optimizer = SGD(lr=0.1, params=[x], momentum=None)

loss_threshold = 1e-3
prev_loss = 0

for step in range(50):
    loss = loss_fn(x)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print(f"Step: {step+1}, x: {x.item()}, loss: {loss.item():.4f}")

    if abs(prev_loss - loss.item()) < loss_threshold:
        print(f"difference in loss is less than the threshold; stopping")
        break
    prev_loss = loss.item()

Step: 1, x: 0.6000000238418579, loss: 9.0000
Step: 2, x: 1.0800000429153442, loss: 5.7600
Step: 3, x: 1.4639999866485596, loss: 3.6864
Step: 4, x: 1.7711999416351318, loss: 2.3593
Step: 5, x: 2.0169599056243896, loss: 1.5099
Step: 6, x: 2.2135679721832275, loss: 0.9664
Step: 7, x: 2.370854377746582, loss: 0.6185
Step: 8, x: 2.4966835975646973, loss: 0.3958
Step: 9, x: 2.597346782684326, loss: 0.2533
Step: 10, x: 2.677877426147461, loss: 0.1621
Step: 11, x: 2.7423019409179688, loss: 0.1038
Step: 12, x: 2.793841600418091, loss: 0.0664
Step: 13, x: 2.835073232650757, loss: 0.0425
Step: 14, x: 2.868058681488037, loss: 0.0272
Step: 15, x: 2.894446849822998, loss: 0.0174
Step: 16, x: 2.915557384490967, loss: 0.0111
Step: 17, x: 2.932446002960205, loss: 0.0071
Step: 18, x: 2.9459567070007324, loss: 0.0046
Step: 19, x: 2.9567654132843018, loss: 0.0029
Step: 20, x: 2.9654123783111572, loss: 0.0019
Step: 21, x: 2.97232985496521, loss: 0.0012
difference in loss is less than the threshold; stoppin

In [None]:
x = Parameter(data=torch.tensor([0.0]), requires_grad=True)
optimizer = SGD(lr=0.1, params=[x], momentum=0.5)

loss_threshold = 1e-3
prev_loss = 0

for step in range(50):
    loss = loss_fn(x)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print(f"Step: {step+1}, x: {x.item()}, loss: {loss.item():.4f}")

    if abs(prev_loss - loss.item()) < loss_threshold:
        print(f"difference in loss is less than the threshold; stopping")
        break
    prev_loss = loss.item()

Step: 1, x: 0.6000000238418579, loss: 9.0000
Step: 2, x: 1.380000114440918, loss: 5.7600
Step: 3, x: 2.0940001010894775, loss: 2.6244
Step: 4, x: 2.632200002670288, loss: 0.8208
Step: 5, x: 2.9748599529266357, loss: 0.1353
Step: 6, x: 3.1512179374694824, loss: 0.0006
Step: 7, x: 3.209153413772583, loss: 0.0229
Step: 8, x: 3.1962904930114746, loss: 0.0437
Step: 9, x: 3.1506009101867676, loss: 0.0385
Step: 10, x: 3.0976359844207764, loss: 0.0227
Step: 11, x: 3.051626205444336, loss: 0.0095
Step: 12, x: 3.018296241760254, loss: 0.0027
Step: 13, x: 2.997972011566162, loss: 0.0003
Step: 14, x: 2.988215446472168, loss: 0.0000
difference in loss is less than the threshold; stopping


In [None]:
x = Parameter(data=torch.tensor([0.0]), requires_grad=True)
optimizer = AdaGrad(lr=1, params=[x])

loss_threshold = 1e-3
prev_loss = 0

for step in range(50):
    loss = loss_fn(x)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print(f"Step: {step+1}, x: {x.item()}, loss: {loss.item():.4f}")

    if abs(prev_loss - loss.item()) < loss_threshold:
        print(f"difference in loss is less than the threshold; stopping")
        break
    prev_loss = loss.item()

Step: 1, x: 1.0, loss: 9.0000
Step: 2, x: 1.5547001361846924, loss: 4.0000
Step: 3, x: 1.926774024963379, loss: 2.0889
Step: 4, x: 2.193084716796875, loss: 1.1518
Step: 5, x: 2.389416217803955, loss: 0.6511
Step: 6, x: 2.536365270614624, loss: 0.3728
Step: 7, x: 2.6472599506378174, loss: 0.2150
Step: 8, x: 2.7313315868377686, loss: 0.1244
Step: 9, x: 2.7952346801757812, loss: 0.0722
Step: 10, x: 2.8438806533813477, loss: 0.0419
Step: 11, x: 2.8809444904327393, loss: 0.0244
Step: 12, x: 2.9091978073120117, loss: 0.0142
Step: 13, x: 2.930741310119629, loss: 0.0082
Step: 14, x: 2.947171211242676, loss: 0.0048
Step: 15, x: 2.959702491760254, loss: 0.0028
Step: 16, x: 2.9692609310150146, loss: 0.0016
Step: 17, x: 2.9765520095825195, loss: 0.0009
difference in loss is less than the threshold; stopping


In [None]:
x = Parameter(data=torch.tensor([0.0]), requires_grad=True)
optimizer = AdaDelta(lr=100, params=[x], alpha=0.001)

loss_threshold = 1e-3
prev_loss = 0

for step in range(1000):
    loss = loss_fn(x)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print(f"Step: {step+1}, x: {x.item()}, loss: {loss.item():.4f}")

    # if abs(prev_loss - loss.item()) < loss_threshold:
    #     print(f"difference in loss is less than the threshold; stopping")
    #     break
    # prev_loss = loss.item()

Step: 1, x: 3.163859946653247e-05, loss: 9.0000
Step: 2, x: 7.635998190380633e-05, loss: 8.9998
Step: 3, x: 0.00013112311717122793, loss: 8.9995
Step: 4, x: 0.00019435286230873317, loss: 8.9992
Step: 5, x: 0.00026504232664592564, loss: 8.9988
Step: 6, x: 0.0003424761525820941, loss: 8.9984
Step: 7, x: 0.00042611226672306657, loss: 8.9979
Step: 8, x: 0.0005155213875696063, loss: 8.9974
Step: 9, x: 0.0006103527848608792, loss: 8.9969
Step: 10, x: 0.0007103127427399158, loss: 8.9963
Step: 11, x: 0.0008151506772264838, loss: 8.9957
Step: 12, x: 0.0009246495319530368, loss: 8.9951
Step: 13, x: 0.001038618735037744, loss: 8.9945
Step: 14, x: 0.0011568896006792784, loss: 8.9938
Step: 15, x: 0.0012793110217899084, loss: 8.9931
Step: 16, x: 0.0014057466760277748, loss: 8.9923
Step: 17, x: 0.001536073163151741, loss: 8.9916
Step: 18, x: 0.001670177560299635, loss: 8.9908
Step: 19, x: 0.0018079562578350306, loss: 8.9900
Step: 20, x: 0.0019493139116093516, loss: 8.9892
Step: 21, x: 0.0020941619295