In [1]:
from shampoo import Shampoo
from natural_grad import NaturalGradientDescent

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt

# Check GPU availability
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

def rosenbrock(x, y):
    return (1 - x) ** 2 + 100 * (y - x ** 2) ** 2

# Create finer mesh grid
x = np.linspace(-2, 2, 200)
y = np.linspace(-1, 3, 200)
X, Y = np.meshgrid(x, y)
Z = rosenbrock(X, Y)

# Initialize params
start_point = torch.tensor([-1.0, 1.0], requires_grad=True)

def update_freq_sched(step):
    return 1
optimizer = Shampoo([start_point],
    lr=3e-1,
    epsilon=1e-4,
    weight_decay=0,
    diag_cutoff=1e3,
    update_freq_sched=update_freq_sched,
    inv_p_root_device=device,               # Coupled Newton's iterations are faster on gpu
    # inv_p_root_device='cpu',                # SVD is faster on cpu
    svd_rank=100,
    newton_num_iters=5,
    newton_num_iters_max_sv=5,
)

# optimizer = torch.optim.Adam([start_point], lr=1e-1)
# optimizer = torch.optim.Adagrad([start_point], lr=5e-1)
optimizer = NaturalGradientDescent([start_point], lr=2e-2, tol=1e-5, max_iter=10)
n_steps = 500
trajectory = [start_point.detach().numpy()]

# Optimization loop
for _ in range(n_steps):
    optimizer.zero_grad()
    x, y = start_point
    loss = rosenbrock(x, y)
    loss.backward()
    optimizer.step()
    trajectory.append(start_point.detach().numpy().copy())  # Important: use copy()

# Plotting with better visibility
plt.figure(figsize=(10, 8))
# Log scale contour for better visualization
plt.contour(X, Y, np.log(Z + 1), levels=20, cmap='viridis', alpha=0.6)
trajectory = np.array(trajectory)
# Make path more visible with larger markers and lines
plt.plot(trajectory[:, 0], trajectory[:, 1], 'r.-', linewidth=2, markersize=8, label='Optimizer path')
plt.plot(1, 1, 'g*', markersize=15, label='Global minimum')
plt.colorbar(label='Log(Loss + 1)')
plt.xlabel('x')
plt.ylabel('y')
plt.title('Optimizer Trajectory on Rosenbrock Function')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from IPython.display import HTML

# Rest of the plotting code remains the same
fig, ax = plt.subplots(figsize=(8, 6), dpi=80)
contour = ax.contour(X, Y, np.log(Z + 1), levels=15, cmap='viridis', alpha=0.6)
ax.plot(1, 1, 'g*', markersize=15, label='Global minimum')
line, = ax.plot([], [], 'r.-', linewidth=2, markersize=8, label='Optimizer path')
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_title('Optimizer Trajectory')
ax.grid(True)
ax.legend()

def animate(frame):
    line.set_data(trajectory[:frame, 0], trajectory[:frame, 1])
    return line,

anim = FuncAnimation(fig, animate, frames=len(trajectory), 
                    interval=100, blit=True)

HTML(anim.to_jshtml(fps=20))