In [1]:
import numpy as np

In [2]:


# Function and gradient
def f(x):
    return x**2

def grad(x):
    return 2 * x

# Hyperparameters
x = 4.0
eta = 0.1
gamma = 0.9
epsilon = 1e-8
Eg2 = 0  # E[g^2]_t
n_steps = 30

print("Step | x       | Gradient | E[g^2]   | Effective LR | Update")
for t in range(1, n_steps+1):
    g = grad(x)
    Eg2 = gamma * Eg2 + (1 - gamma) * g**2
    lr = eta / (np.sqrt(Eg2) + epsilon)
    update = lr * g
    x -= update
    print(f"{t:>4} | {x:7.4f} | {g:8.4f} | {Eg2:8.4f} | {lr:12.6f} | {update:7.4f}")


Step | x       | Gradient | E[g^2]   | Effective LR | Update
   1 |  3.6838 |   8.0000 |   6.4000 |     0.039528 |  0.3162
   2 |  3.4635 |   7.3675 |  11.1881 |     0.029897 |  0.2203
   3 |  3.2839 |   6.9270 |  14.8676 |     0.025935 |  0.1796
   4 |  3.1277 |   6.5677 |  17.6943 |     0.023773 |  0.1561
   5 |  2.9873 |   6.2554 |  19.8380 |     0.022452 |  0.1404
   6 |  2.8582 |   5.9746 |  21.4237 |     0.021605 |  0.1291
   7 |  2.7378 |   5.7164 |  22.5491 |     0.021059 |  0.1204
   8 |  2.6244 |   5.4756 |  23.2924 |     0.020720 |  0.1135
   9 |  2.5166 |   5.2487 |  23.7181 |     0.020533 |  0.1078
  10 |  2.4136 |   5.0332 |  23.8796 |     0.020464 |  0.1030
  11 |  2.3147 |   4.8272 |  23.8218 |     0.020489 |  0.0989
  12 |  2.2194 |   4.6294 |  23.5827 |     0.020592 |  0.0953
  13 |  2.1272 |   4.4387 |  23.1946 |     0.020764 |  0.0922
  14 |  2.0379 |   4.2544 |  22.6852 |     0.020996 |  0.0893
  15 |  1.9511 |   4.0757 |  22.0778 |     0.021282 |  0.0867
  16 |  1

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim

# Toy data
X = torch.tensor([[1.0], [2.0], [3.0], [4.0]])
y = torch.tensor([[2.0], [4.0], [6.0], [8.0]])

# Model
model = nn.Linear(1, 1)
criterion = nn.MSELoss()
optimizer = optim.RMSprop(model.parameters(), lr=0.01, alpha=0.9)

# Training
for epoch in range(100):
    optimizer.zero_grad()
    outputs = model(X)
    loss = criterion(outputs, y)
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {loss.item():.4f}")


Epoch 0, Loss: 13.0555
Epoch 10, Loss: 9.0221
Epoch 20, Loss: 6.9107
Epoch 30, Loss: 5.2607
Epoch 40, Loss: 3.9068
Epoch 50, Loss: 2.7998
Epoch 60, Loss: 1.9183
Epoch 70, Loss: 1.2474
Epoch 80, Loss: 0.7709
Epoch 90, Loss: 0.4672
