In [8]:
import torch
import numpy as np
from scipy.optimize import minimize, Bounds
import cProfile, io, pstats
from pstats import SortKey

rand_tensor = torch.rand(1024, 128, 128, dtype=torch.double)

def f_and_grad(x):
    X_tensor = torch.from_numpy(x).contiguous().requires_grad_(True)
    loss = ((rand_tensor @ rand_tensor) - X_tensor).sum()
    grad = torch.autograd.grad(loss, X_tensor)[0].view(-1).numpy(force=True)
    return loss.item(), grad

bounds = Bounds(
    lb=np.zeros(128),
    ub=np.ones(128),
    keep_feasible=True,
)
x0 = np.zeros(128)

with cProfile.Profile() as pr:
    minimize(
        fun=f_and_grad,
        x0=x0,
        method="L-BFGS-B",
        jac=True,
        bounds=bounds,
        options={"maxiter": 10},
    )

sortby = SortKey.CUMULATIVE
ps = pstats.Stats(pr).sort_stats(sortby)
ps.print_stats()

         853 function calls (851 primitive calls) in 0.136 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.136    0.136 /opt/anaconda3/lib/python3.9/site-packages/scipy/optimize/_minimize.py:45(minimize)
        1    0.000    0.000    0.136    0.136 /opt/anaconda3/lib/python3.9/site-packages/scipy/optimize/lbfgsb.py:210(_minimize_lbfgsb)
        4    0.000    0.000    0.136    0.034 /opt/anaconda3/lib/python3.9/site-packages/scipy/optimize/optimize.py:65(_compute_if_needed)
        3    0.000    0.000    0.136    0.045 /opt/anaconda3/lib/python3.9/site-packages/scipy/optimize/_differentiable_functions.py:231(_update_fun)
        2    0.000    0.000    0.136    0.068 /opt/anaconda3/lib/python3.9/site-packages/scipy/optimize/_differentiable_functions.py:136(update_fun)
        2    0.000    0.000    0.136    0.068 /opt/anaconda3/lib/python3.9/site-packages/scipy/optimize/_differentiable_func

<pstats.Stats at 0x13f659160>