In [1]:
from cminpack_numba import lmdif1, lmdif, lmdif_sig
import numpy as np
import numba as nb
from scipy.optimize import leastsq
import jax
import jax.numpy as jnp
from cminpack_numba.scipy_overloads import *


@nb.cfunc(lmdif_sig)
def func_numba(udata, m, n, x, fvec, iflag):
    fvec[0] = 2.0 * (x[0] - 3.0) ** 2 + 1.0*x[1]
    fvec[1] = 3.0 * (x[0] - 4.0) ** 2 + 2.0*x[1]
    fvec[2] = 4.0 * (x[0] - 5.0) ** 2 + 3.0*x[1]
    fvec[3] = 5.0 * (x[0] - 6.0) ** 2 + 4.0*x[1]
    fvec[4] = 6.0 * (x[0] - 7.0) ** 2 + 5.0*x[1]
    return 0

def func_scipy(x, args=()):
    return np.array([2.0 * (x[0] - 3.0) ** 2 + 1.0*x[1], 3.0 * (x[0] - 4.0) ** 2 + 2.0*x[1], 4.0 * (x[0] - 5.0) ** 2 + 3.0*x[1], 5.0 * (x[0] - 6.0) ** 2 + 4.0*x[1], 6.0 * (x[0] - 7.0) ** 2 + 5.0*x[1]])


In [2]:
leastsq(func_scipy, [0.0, 0.0], full_output=True)

(array([ 5.34948832, -2.06584694]),
 array([[0.00224468, 0.00371682],
        [0.00371682, 0.02433627]]),
 {'fvec': array([ 8.9743438 ,  1.33166231, -5.70897246, -6.14756052,  6.01589815]),
  'nfev': 26,
  'fjac': array([[-24.41919434,   0.33158155,   0.1144976 ,  -0.26639192,
           -0.81108702],
         [  3.72948693,  -6.41022054,   0.50320715,   0.54209607,
            0.53061976]]),
  'ipvt': array([1, 2], dtype=int32),
  'qtf': array([-7.33239827e-05, -4.30875351e-05])},
 'Both actual and predicted relative reductions in the sum of squares\n  are at most 0.000000',
 1)

In [3]:
lmdif1(func_numba.address, 5, np.array([0.0, 0.0]))

(array([ 5.34948832, -2.06584691]),
 array([ 8.97434385,  1.33166238, -5.70897239, -6.14756044,  6.01589821]),
 1)

In [4]:
lmdif(func_numba.address, 5, np.array([0.0, 0.0]))

(array([0., 0.]),
 array([ 8.97434385,  1.33166238, -5.70897239, -6.14756044,  6.01589821]),
 1,
 array([[-24.41919426,   0.33158155],
        [  0.1144976 ,  -0.26639192],
        [ -0.81108701,   3.72948691],
        [ -6.41022053,   0.50320716],
        [  0.54209607,   0.53061975]]),
 array([1, 2], dtype=int32),
 array([-7.33541237e-05, -4.29710722e-05]),
 26)

In [27]:
# %timeit leastsq(func_scipy, [0.0, 0.0], full_output=True)
%timeit -n 1000 lmdif1(func_numba.address, 5, np.array([0.0, 0.0]))
%timeit -n 1000 lmdif(func_numba.address, 5, np.array([0.0, 0.0]))
ftol = 1.49012e-08
xtol = 1.49012e-08
gtol = 0.0
maxfev = 1000
epsfcn = np.finfo(np.float64).eps
factor = 100.0
diag = np.array([1.0, 1.0])
mode = 1
nprint = 0
%timeit -n 1000 lmdif(func_numba.address, 5, np.array([0.0, 0.0]), ftol, xtol, gtol, maxfev, epsfcn, diag, mode, factor, nprint)
%timeit -n 1000 lmdif(func_numba.address, 5, np.array([0.0, 0.0]), diag=diag)
%timeit -n 1000 lmdif(func_numba.address, 5, np.array([0.0, 0.0]), epsfcn=epsfcn)
%timeit -n 1000 lmdif(func_numba.address, 5, np.array([0.0, 0.0]), maxfev=maxfev)
%timeit -n 1000 lmdif(func_numba.address, 5, np.array([0.0, 0.0]), diag=diag, epsfcn=epsfcn, maxfev=maxfev)
%timeit -n 1000 lmdif(func_numba.address, 5, np.array([0.0, 0.0]), ftol)#, xtol, gtol, maxfev, epsfcn, diag, mode, factor, nprint)
%timeit -n 1000 lmdif(func_numba.address, 5, np.array([0.0, 0.0]), ftol, xtol)#, gtol, maxfev, epsfcn, diag, mode, factor, nprint)
%timeit -n 1000 lmdif(func_numba.address, 5, np.array([0.0, 0.0]), ftol, xtol, gtol, maxfev)#, epsfcn, diag, mode, factor, nprint)
%timeit -n 1000 lmdif(func_numba.address, 5, np.array([0.0, 0.0]), ftol, xtol, gtol, maxfev, epsfcn)#, diag, mode, factor, nprint)
%timeit -n 1000 lmdif(func_numba.address, 5, np.array([0.0, 0.0]), ftol, xtol, gtol, maxfev, epsfcn, diag)#, mode, factor, nprint)
%timeit -n 1000 lmdif(func_numba.address, 5, np.array([0.0, 0.0]), ftol, xtol, gtol, maxfev, epsfcn, diag, mode)#, factor, nprint)
%timeit -n 1000 lmdif(func_numba.address, 5, np.array([0.0, 0.0]), ftol, xtol, gtol, maxfev, epsfcn, diag, mode, factor)#, nprint)
%timeit -n 1000 lmdif(func_numba.address, 5, np.array([0.0, 0.0]), ftol, xtol, gtol, maxfev, epsfcn, diag, mode, factor, nprint)

4.3 µs ± 59.6 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
34.4 µs ± 1.07 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
5.37 µs ± 113 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
37 µs ± 304 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
34.1 µs ± 394 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
35.4 µs ± 403 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
37.1 µs ± 748 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
33.1 µs ± 185 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
32.6 µs ± 392 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
31.8 µs ± 267 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
31.5 µs ± 245 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
34 µs ± 100 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
33.9 µs ± 261 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
33.3 µs ± 441 ns per loop (mean ± std. dev. of 7 runs,

In [1]:
import time
import numba as nb
import numpy as np


@nb.njit
def function(a, b, c, d=1.49012e-8, e=1.49012000000001e-8, f=0.0, g=None):
    ...
        
@nb.njit
def function2(a, b, c, d=1.49012e-8, e=1.49012000000001e-8, f=0.0):
    ...
        
@nb.njit
def function3(a, b, c, d=1.49012e-8, e=1.49012000000001e-8, f=None, g=0.0):
    ...
    
@nb.njit
def function4(a, b, c, d=1.49012e-8, e=1.49012e-8, f=0.0, g=None):
    ...

d = 1.49012e-8
e = 1.49012000000001e-8
f = 0.0
g = 1000

args = (d, e, f, g)
kwargs = {'d': d, 'e': e, 'f': f, 'g': g}
for func in (function, function2, function3, function4):
    func(1, 2, 3)
    
def time_func(func, args, kwargs):
    func(1, 2, 3)
    
    print(func.__name__)
    print("time *args")
    for i, _ in enumerate(args):
        # func(1, 2, 3, *args[:i])
        %timeit -n 1000 func(1, 2, 3, *args[:i])
    print("time **kwargs")
    for i in kwargs:
        _kwargs = {k: v for k, v in kwargs.items() if k != i}
        # func(1, 2, 3, **_kwargs)
        %timeit -n 1000 func(1, 2, 3, **_kwargs)

time_func(function, args, kwargs)
time_func(function2, args[:-1], {k: v for k, v in kwargs.items() if k != 'g'})
time_func(function3, args, kwargs)
time_func(function4, args, kwargs)

function
time *args
16.8 µs ± 124 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
18.1 µs ± 4.1 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
17.1 µs ± 4.37 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
The slowest run took 60.52 times longer than the fastest. This could mean that an intermediate result is being cached.
1.89 µs ± 4.14 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
time **kwargs
The slowest run took 60.45 times longer than the fastest. This could mean that an intermediate result is being cached.
1.78 µs ± 3.9 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
17.1 µs ± 3.77 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
17.4 µs ± 4.63 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
210 ns ± 12.4 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
function2
time *args
16.5 µs ± 247 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
17.6 µs ± 4.36 µs per loop (mean ± std. dev. o

In [7]:
for i in (llvm:=function.inspect_llvm()):
    print(i)
    print(llvm[i])

(int64, int64, int64, omitted(default=1.49012e-08), omitted(default=1.49012000000001e-08), omitted(default=0.0), omitted(default=None))
; ModuleID = 'function'
source_filename = "<string>"
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
target triple = "arm64-apple-darwin23.1.0"

@.const.function = internal constant [9 x i8] c"function\00"
@_ZN08NumbaEnv8__main__8functionB2v1B38c8tJTIcFKzyF2ILShI4CrgQElQb6HczSBAA_3dExxxN21omitted_28default_3d115_49012e_2d08_29EN21omitted_28default_3d124_49012000000001e_2d08_29EN21omitted_28default_3d05_0_29E27omitted_28default_3dNone_29 = common local_unnamed_addr global i8* null
@PyExc_RuntimeError = external global i8
@".const.missing Environment: _ZN08NumbaEnv8__main__8functionB2v1B38c8tJTIcFKzyF2ILShI4CrgQElQb6HczSBAA_3dExxxN21omitted_28default_3d115_49012e_2d08_29EN21omitted_28default_3d124_49012000000001e_2d08_29EN21omitted_28default_3d05_0_29E27omitted_28default_3dNone_29" = internal constant [255 x i8] c"missing Environment: _ZN08NumbaE

In [18]:
llvm_list = list(llvm.keys())
hash(llvm_list[0])
%timeit -n 1000 llvm[llvm_list[0]]

1.05 µs ± 64.7 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [None]:
%load_ext Cython