In [1]:
import torch

In [2]:
# Define a simple function: f(x) = x^2 + 2x + 3
def f(x):
    return x**2 + 2*x + 3

# Create a tensor with a value of 2.0
x = torch.tensor(2.0, requires_grad=True)

# Forward pass: Compute the function value and create a computation graph
y = f(x)

# Perform backward pass: Compute gradients
y.backward()

# Get the gradient of the function w.r.t. the input x
gradient = x.grad

# Optimize the function using gradient descent
learning_rate = 0.1
num_steps = 100

for step in range(num_steps):
    # Forward pass: Compute the function value and create a computation graph
    y = f(x)

    # Perform backward pass: Compute gradients
    y.backward()

    # Update x using gradient descent
    with torch.no_grad():
        x -= learning_rate * x.grad
        
    # check the gradient before clear it
    if (step + 1) % 10 == 0:
        print(f"Step [{step+1}/{num_steps}], x: {x.item():.6f}, f(x): {y.item():.6f}, x.grad: {gradient.item():.6f}")
        
    # Clear the gradients after each step
    x.grad.zero_()

print("Optimized x:", x.item())
print("Optimized f(x):", f(x).item())

Step [10/100], x: -0.758408, f(x): 2.091198, x.grad: 0.603980
Step [20/100], x: -0.974059, f(x): 2.001051, x.grad: 0.064852
Step [30/100], x: -0.997215, f(x): 2.000012, x.grad: 0.006963
Step [40/100], x: -0.999701, f(x): 2.000000, x.grad: 0.000748
Step [50/100], x: -0.999968, f(x): 2.000000, x.grad: 0.000080
Step [60/100], x: -0.999997, f(x): 2.000000, x.grad: 0.000009
Step [70/100], x: -1.000000, f(x): 2.000000, x.grad: 0.000001
Step [80/100], x: -1.000000, f(x): 2.000000, x.grad: 0.000000
Step [90/100], x: -1.000000, f(x): 2.000000, x.grad: 0.000000
Step [100/100], x: -1.000000, f(x): 2.000000, x.grad: 0.000000
Optimized x: -0.9999998807907104
Optimized f(x): 2.0


# second order derivaives

In [3]:
# Define the function: f(x, y) = x^2 + 3xy + y^2 + 2x + 3y
def f(x, y):
    return x**2 + 3 * x * y + y**2 + 2 * x + 3 * y

# Create two tensors with values of 2.0 and 1.0
x = torch.tensor(2.0, requires_grad=True)
y = torch.tensor(1.0, requires_grad=True)

# Forward pass: Compute the function value and create a computation graph
z = f(x, y)

# Perform first-order backward pass: Compute gradients
grads = torch.autograd.grad(z, [x, y], create_graph=True)

# Get the first-order gradients (gradient w.r.t. x and y)
grad_x, grad_y = grads

# Perform second-order backward pass: Compute the Hessian matrix
# Compute the second-order derivatives by taking the gradient of gradients
hessian = []
for grad in grads:
    hess = torch.autograd.grad(grad, [x, y], retain_graph=True)
    hessian.append(hess)

# Get the second-order derivatives (Hessian matrix)
hessian_xx, hessian_xy = hessian[0]
hessian_yx, hessian_yy = hessian[1]

# Print the results
print("Function value (z):", z.item())
print("First-order gradient (dz/dx):", grad_x.item())
print("First-order gradient (dz/dy):", grad_y.item())
print("Second-order gradient (d^2z/dx^2):", hessian_xx.item())
print("Second-order gradient (d^2z/dxdy):", hessian_xy.item())
print("Second-order gradient (d^2z/dydx):", hessian_yx.item())
print("Second-order gradient (d^2z/dy^2):", hessian_yy.item())

Function value (z): 18.0
First-order gradient (dz/dx): 9.0
First-order gradient (dz/dy): 11.0
Second-order gradient (d^2z/dx^2): 2.0
Second-order gradient (d^2z/dxdy): 3.0
Second-order gradient (d^2z/dydx): 3.0
Second-order gradient (d^2z/dy^2): 2.0
