In [5]:
import math
import time
import tracemalloc
import torch


In [6]:

def func(x):               #Function
    
    x1, x2 = x[0], x[1]
    return torch.log(x1) + x1 * x2 - torch.sin(x2)

In [11]:
def closed_form_grad(x1_val, x2_val):
    
    x1 = torch.tensor(float(x1_val), dtype=torch.float64)
    x2 = torch.tensor(float(x2_val), dtype=torch.float64)

    # x1, x2 = x1_val[0], x2_val[1]

    g1 = 1.0 / x1 + x2
    g2 = x1 - torch.cos(x2)

    return torch.stack([g1, g2])  

In [8]:

def reverse_grad(x1_val, x2_val):
    
    x = torch.tensor([x1_val, x2_val], dtype=torch.float64,requires_grad=True)
    y = func(x)
    y.backward()
    return x.grad

In [9]:

def forward_grad(x1_val, x2_val):
   
    x = torch.tensor([x1_val, x2_val], dtype=torch.float64)

  
    e1 = torch.tensor([1.0, 0.0], dtype=torch.float64)
    e2 = torch.tensor([0.0, 1.0], dtype=torch.float64)

    i, dy_dx1 = torch.autograd.functional.jvp(func, (x,), (e1,))
    i, dy_dx2 = torch.autograd.functional.jvp(func, (x,), (e2,))

    return torch.stack([dy_dx1, dy_dx2]) 

In [13]:

def bench(fn, *args, repeats=10000, warmup=1000):
    
  
    for i in range(warmup):
        fn(*args)

    tracemalloc.start()
    start_snapshot = tracemalloc.take_snapshot()
    t0 = time.perf_counter()

    last_result = None
    for _ in range(repeats):
        last_result = fn(*args)

    t1 = time.perf_counter()
    end_snapshot = tracemalloc.take_snapshot()
    stats_diff = end_snapshot.compare_to(start_snapshot, "lineno")
    tracemalloc.stop()

    # Rough "peak" = total positive size difference across lines
    peak_bytes = sum(s.size_diff for s in stats_diff if s.size_diff > 0)

    return last_result, (t1 - t0), peak_bytes


if __name__ == "__main__":
    x1 = 2.0
    x2 = 5.0

    print(f"x1 = {x1}, x2 = {x2}\n")

    
    g_closed = closed_form_grad(x1, x2)
    g_rev = reverse_grad(x1, x2)
    g_fwd = forward_grad(x1, x2)

    print("Closed-form grad :", g_closed.numpy())
    print("Reverse-mode grad:", g_rev.detach().numpy())
    print("Forward-mode grad:", g_fwd.detach().numpy())
    print()

    # --- Benchmarks ---
    repeats = 20000

    print(f"Benchmarking with repeats = {repeats} ...\n")

    for name, fn in [
        ("Closed-form", closed_form_grad),
        ("Reverse-mode AD", reverse_grad),
        ("Forward-mode AD", forward_grad),
    ]:
        grad, total_time, peak_bytes = bench(fn, x1, x2, repeats=repeats)
        time_per_call = total_time / repeats
        print(f"{name}:")
        print(f"  grad           = {grad.detach().numpy()}")
        print(f"  total time     = {total_time:.6f} s")
        print(f"  time / call    = {time_per_call:.6e} s")
        print(f"  approx memory  = {peak_bytes / 1024:.3f} KB (aggregate)\n")

x1 = 2.0, x2 = 5.0

Closed-form grad : [5.5        1.71633781]
Reverse-mode grad: [5.5        1.71633781]
Forward-mode grad: [5.5        1.71633781]

Benchmarking with repeats = 20000 ...

Closed-form:
  grad           = [5.5        1.71633781]
  total time     = 0.374469 s
  time / call    = 1.872346e-05 s
  approx memory  = 3.670 KB (aggregate)

Reverse-mode AD:
  grad           = [5.5        1.71633781]
  total time     = 1.400012 s
  time / call    = 7.000060e-05 s
  approx memory  = 1.451 KB (aggregate)

Forward-mode AD:
  grad           = [5.5        1.71633781]
  total time     = 6.927174 s
  time / call    = 3.463587e-04 s
  approx memory  = 3.377 KB (aggregate)

