# Optimización $L(\theta)=\theta^2$

In [None]:
import torch
from IPython.display import display, Math

# parámetro inicial
theta = torch.tensor(3.0, requires_grad=True)
# optimizador, se encarga de actualizar theta
optimizer = torch.optim.SGD([theta], lr=0.1)

# bucle de optimización (30 pasos)
for step in range(30):
    # ponemos a cero los gradientes
    optimizer.zero_grad()
    # función de pérdida theta^2
    loss = theta**2
    # cálculo de gradientes
    loss.backward()
    # actualización de parámetros
    optimizer.step()
    # mostramos el progreso fancy en latex
    display(Math(
    rf"\text{{Paso }} {step + 1}: \ \theta \text{{ se ha movido a }} {theta.item():.3f},\ "
    rf"\text{{loss}} = {loss.item():.4f}"
    ))



# Animación del proceso de optimización $L(\theta)=\theta^2$

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from IPython.display import HTML

# ----- 1. RUN GRADIENT DESCENT AND STORE TRAJECTORY -----

theta = torch.tensor(3.0, requires_grad=True)
optimizer = torch.optim.SGD([theta], lr=0.1)

trajectory = []
grads = []

for step in range(30):
    trajectory.append(theta.item())
    optimizer.zero_grad()
    loss = theta**2
    loss.backward()
    grads.append(theta.grad.item())   # store gradient 2θ
    optimizer.step()


trajectory = np.array(trajectory)
grads = np.array(grads)


# ----- 2. PLOT SETUP -----

fig, ax = plt.subplots(figsize=(6, 4))

# Draw parabola
x = np.linspace(-3, 3, 400)
ax.plot(x, x**2, label="f(θ)=θ²")

# Red point showing θ
point, = ax.plot([], [], 'ro', markersize=8)

# Gradient arrow
arrow = ax.arrow(0, 0, 0, 0, color='blue')

# Text info
text = ax.text(0.05, 0.9, "", transform=ax.transAxes)

ax.set_xlim(-3, 3)
ax.set_ylim(0, 10)
ax.set_xlabel("θ")
ax.set_ylabel("Loss")
ax.set_title("Optimización de θ² con gradiente descendente")
ax.legend()


# ----- 3. ANIMATION FUNCTION -----

def update(frame):
    θ = trajectory[frame]
    grad = grads[frame]

    # Update red point
    point.set_data([θ], [θ**2])

    # Remove old arrow
    global arrow
    arrow.remove()

    # Gradient arrow along x-axis
    scale = 1
    dx = - scale * grad * 0.1   # gradient points right if grad > 0, left if grad < 0
    dy = 0              # no vertical component

    arrow = ax.arrow(θ, θ**2, dx, dy, color='blue', width=0.02)

    # Update text
    text.set_text(f"step={frame}, θ={θ:.3f}, grad={grad:.3f}, loss={θ**2:.3f}")

    return point, arrow, text



anim = FuncAnimation(fig, update, frames=len(trajectory), interval=300)

plt.close()

# ----- 4. DISPLAY IN COLAB -----

HTML(anim.to_jshtml())




# Optimización $L(\theta)=\theta_1^2 + \theta_2^2$

In [None]:
import numpy as np
import torch
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from IPython.display import HTML

# ----- 1. RUN GRADIENT DESCENT AND STORE TRAJECTORY -----

theta = torch.tensor([2.5, -1.8], requires_grad=True)  # [theta1, theta2]
optimizer = torch.optim.SGD([theta], lr=0.15)

trajectory = []
grads = []
losses = []

for step in range(20):
    trajectory.append(theta.detach().cpu().numpy().copy())

    optimizer.zero_grad()
    loss = (theta[0] ** 2) + (theta[1] ** 2)   # theta1^2 + theta2^2
    loss.backward()

    grads.append(theta.grad.detach().cpu().numpy().copy())
    losses.append(loss.item())

    optimizer.step()

trajectory = np.array(trajectory)  # shape: (T, 2)
grads = np.array(grads)            # shape: (T, 2)
losses = np.array(losses)

# ----- 2. PLOT SETUP -----

fig, ax = plt.subplots(figsize=(6, 6))

# Contour plot of f(theta1, theta2) = theta1^2 + theta2^2
grid = np.linspace(-3, 3, 300)
X, Y = np.meshgrid(grid, grid)
Z = X**2 + Y**2
ax.contour(X, Y, Z, levels=20)

# Red point for current theta
point, = ax.plot([], [], 'ro', markersize=8)

# Gradient arrow (we'll remove & redraw each frame like your code)
arrow = ax.arrow(0, 0, 0, 0)

# Text info
text = ax.text(0.03, 0.97, "", transform=ax.transAxes, va="top")

ax.set_xlim(-3, 3)
ax.set_ylim(-3, 3)
ax.set_xlabel(r"$\theta_1$")
ax.set_ylabel(r"$\theta_2$")
ax.set_title(r"Optimización de $f(\theta_1,\theta_2)=\theta_1^2+\theta_2^2$ con gradiente descendente")

# ----- 3. ANIMATION FUNCTION -----

def update(frame):
    global arrow

    th1, th2 = trajectory[frame]
    g1, g2 = grads[frame]
    loss = losses[frame]

    # Update point
    point.set_data([th1], [th2])

    # Remove old arrow
    arrow.remove()

    # Draw arrow in the *descent direction* (-grad) with scaling
    lr = 0.15
    scale = 1.0
    dx = -scale * g1 * lr
    dy = -scale * g2 * lr

    arrow = ax.arrow(th1, th2, dx, dy, width=0.03, length_includes_head=True)

    # Update text (LaTeX for thetas)
    text.set_text(
        rf"step={frame}  "
        rf"$\theta_1$={th1:.3f}, $\theta_2$={th2:.3f}  "
        rf"$\nabla f$=({g1:.3f},{g2:.3f})  "
        rf"loss={loss:.3f}"
    )

    return point, arrow, text

anim = FuncAnimation(fig, update, frames=len(trajectory), interval=250)

plt.close()
HTML(anim.to_jshtml())



# Ejercicio

Aplica el gradiente descendente para minimizar la función $L(\theta_1,\dots,\theta_{100}) = \sum_{i=1}^{100} (\theta_i - 1)^2$. Muestra solo el loss en cada paso y el valor final de $\theta$ al terminar la optimización.

Nota. Debes inicializar $\theta$ con valores aleatorios. El resultado esperado es que cada $\theta_i$ tienda a 1 al finalizar la optimización.
- https://docs.pytorch.org/docs/stable/generated/torch.rand.html
- `torch` funciona de manera similar a numpy, por lo que puedes usar operaciones vectorizadas como `theta + 25`, `theta ** 2`, etc.
- `.sum()` se usar para sumar todos los elementos de un tensor.

In [None]:
# TODO: Ejercicio