# Preliminaries

In [1]:
import numpy as np

In [2]:
THIS = 'dissipation-theory--Study-52--'
figs = {}
obj = {}
df = {}

In [3]:
from dissipationtheory.constants import ureg, epsilon0, qe
from dissipationtheory.dissipation9a import CantileverModel, SampleModel1, SampleModel2, SampleModel3, SampleModel4
from dissipationtheory.dissipation9a import integrand1, integrand2, integrand2, integrand3, K, Kunits, Kmetal, Kmetalunits

In [4]:
from dissipationtheory.dissipation9b import CantileverModelJit, SampleModel1Jit, SampleModel2Jit, SampleModel3Jit, SampleModel4Jit
from dissipationtheory.dissipation9b import integrand1jit, integrand2jit, integrand3jit, K_jit, Kunits_jit, Kmetal_jit, Kmetalunits_jit

# Cantilever

In [5]:
cantilever = CantileverModel(
    f_c = ureg.Quantity(62, 'kHz'),
    k_c = ureg.Quantity(2.8, 'N/m'), 
    V_ts = ureg.Quantity(1, 'V'), 
    R = ureg.Quantity(55, 'nm'),
    angle = ureg.Quantity(20, 'degree'),
    L = ureg.Quantity(1000, 'nm')
)

In [6]:
cantilever_jit = CantileverModelJit(**cantilever.args())

# Common parameters

Now unitless, to be fed to `jit` compiled functions. It is *very important* that the numpy array data be of type `float64` and not `int`.

In [7]:
omega = 1e5

loc1 = 1e-9 * np.array([ 10, 20, 50], dtype=np.float64)
loc2 = 1e-9 * np.array([ 0,   0, 50], dtype=np.float64)

omega_u = ureg.Quantity(1e5, 'Hz')

loc1_u = ureg.Quantity(np.array([ 10, 20, 50]), 'nm')
loc2_u = ureg.Quantity(np.array([ 0,   0, 50]), 'nm')

# Debugging `jit` compilation

In [8]:
sample1 = SampleModel1(
    cantilever = cantilever,
    h_s = ureg.Quantity(100, 'nm'),
    epsilon_s = ureg.Quantity(complex(20, 0), ''),
    sigma = ureg.Quantity(1e-7, 'S/m'),
    rho = ureg.Quantity(1e21, '1/m^3'),
    epsilon_d = ureg.Quantity(complex(1e6, 0), ''),
    z_r = ureg.Quantity(100, 'nm')
)

In [9]:
sample1.args()

{'cantilever': <numba.experimental.jitclass.boxing.CantileverModelJit at 0x11ba1b070>,
 'h_s': 1.0000000000000001e-07,
 'epsilon_s': (20+0j),
 'sigma': 1e-07,
 'rho': 1e+21,
 'epsilon_d': (1000000+0j),
 'z_r': 1.0000000000000001e-07}

In [10]:
sample1_jit = SampleModel1Jit(**sample1.args())

In [11]:
from numba import jit
from numba import float64

In [12]:
@jit(float64(SampleModel1Jit.class_type.instance_type), nopython=True)

def testme(sample1_jit):
    return sample1_jit.kD

In [13]:
testme(sample1_jit)

26456583.416667342

In [14]:
import numba.types as nb_types

In [15]:
@jit(float64(float64, SampleModel1Jit.class_type.instance_type), nopython=True)

def testme(y, sample):

    return y * sample.type

In [16]:
[testme(y, sample1_jit) for y in [1, 2, 3]]

[1.0, 2.0, 3.0]

In [17]:
@jit(float64(float64, SampleModel1Jit.class_type.instance_type, float64), nopython=True)

def testme(y, sample, omega):

    return y * sample.type * omega

In [18]:
[testme(y, sample1_jit, 2) for y in [1, 2, 3]]

[2.0, 4.0, 6.0]

In [19]:
@jit(float64(float64[:]), nopython=True)

def testme(location):

    return location[0] * location[1]

This succeeds:

In [20]:
testme(np.array([3., 4., 5.]))

12.0

This will fail with the error 

    TypeError: No matching definition for argument type(s) array(int64, 1d, C)

because the array being passed to the function is an array of *integers* and not *floats*.

In [21]:
if 0:
    testme(np.array([3, 4, 5]))

# Type I sample

In [22]:
sample1 = SampleModel1(
    cantilever = cantilever,
    h_s = ureg.Quantity(100, 'nm'),
    epsilon_s = ureg.Quantity(complex(20, 0), ''),
    sigma = ureg.Quantity(1e-7, 'S/m'),
    rho = ureg.Quantity(1e21, '1/m^3'),
    epsilon_d = ureg.Quantity(complex(1e6, 0), ''),
    z_r = ureg.Quantity(100, 'nm')
)

In [23]:
sample1_jit = SampleModel1Jit(**sample1.args())

In [24]:
integrand1jit(1.4, sample1_jit, omega, loc1, loc2)

array([ 2.20213431e-01, -9.09429597e-05,  3.08298804e-01, -1.27320144e-04,
        4.31618325e-01, -1.78248201e-04])

In [25]:
np.allclose(
    integrand1jit(1.4, sample1_jit, omega, loc1, loc2),
    integrand1(1.4, sample1, omega_u, loc1_u, loc2_u))

True

In [26]:
params1_jit = {'integrand': integrand1jit, 'sample': sample1_jit, 'omega': omega, 'location1': loc1, 'location2': loc2}
K_jit(**params1_jit)

array([0.92308803-0.00023051j, 0.85642218-0.0003009j ,
       1.57311203-0.00058176j])

In [27]:
params1 = {'integrand': integrand1, 'sample': sample1, 'omega': omega_u, 'location1': loc1_u, 'location2': loc2_u}
K(**params1)

array([0.92308803-0.00023051j, 0.85642218-0.0003009j ,
       1.57311203-0.00058176j])

In [28]:
np.allclose(K_jit(**params1_jit), K(**params1))

True

In [29]:
Kunits_jit(**params1_jit)

(<Quantity((0.009230880287373287-2.3050839017765616e-06j), '1 / nanometer')>,
 <Quantity((8.564221806744136e-05-3.008996480189857e-08j), '1 / nanometer ** 2')>,
 <Quantity((1.5731120310679616e-06-5.817587342422956e-10j), '1 / nanometer ** 3')>)

In [30]:
Kunits(**params1)

(<Quantity((0.009230880287373287-2.3050839017765616e-06j), '1 / nanometer')>,
 <Quantity((8.564221806744135e-05-3.008996480189856e-08j), '1 / nanometer ** 2')>,
 <Quantity((1.5731120310679616e-06-5.817587342422955e-10j), '1 / nanometer ** 3')>)

How to compare these two?  Make a wrapper function that strips the units.

In [31]:
def stripKunits(Kn_tuple):
    units = ('1/nm','1/nm**2','1/nm**3')
    return(np.array([Kn.to(unit).magnitude for Kn, unit in zip(Kn_tuple, units)]))

In [32]:
stripKunits(Kunits(**params1))

array([9.23088029e-03-2.30508390e-06j, 8.56422181e-05-3.00899648e-08j,
       1.57311203e-06-5.81758734e-10j])

The function `K_jit` is 60 to 100 times faster than the uncompiled `K` function for a Type I sample.

In [65]:
%%timeit
K_jit(**params1_jit)

6.92 ms ± 3.32 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


The `Kunits_jit` function is slightly slower than the `K_jit` function. \
Does units to the function slow you down?

In [66]:
%%timeit
Kunits_jit(**params1_jit)

7.01 ms ± 1.41 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


# Type II sample

In [35]:
sample2 = SampleModel2(
    cantilever = cantilever,
    epsilon_d = ureg.Quantity(complex(3, 0), ''),
    h_d = ureg.Quantity(20, 'nm'),
    epsilon_s = ureg.Quantity(complex(20, 0), ''),
    sigma = ureg.Quantity(1e-7, 'S/m'),
    rho = ureg.Quantity(1e21, '1/m^3'),
    z_r = ureg.Quantity(100, 'nm')
)

In [36]:
sample2_jit = SampleModel2Jit(**sample2.args())

In [37]:
params2_jit = {'integrand': integrand2jit, 'sample': sample2_jit, 'omega': omega, 'location1': loc1, 'location2': loc2}
K_jit(**params2_jit)

array([0.7921492 -0.0003819j , 0.68749971-0.0002725j ,
       1.18185061-0.00037396j])

In [38]:
params2 = {'integrand': integrand2, 'sample': sample2, 'omega': omega_u, 'location1': loc1_u, 'location2': loc2_u}
K(**params2)

array([0.7921492 -0.0003819j , 0.68749971-0.0002725j ,
       1.18185061-0.00037396j])

The compiled `K_jit` function is about 100 faster with a Type II sample.

In [62]:
%%timeit
K_jit(**params2_jit)

4 ms ± 1.28 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [63]:
%%timeit
K(**params2)

327 ms ± 56.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Type III sample

In [39]:
sample3 = SampleModel3(
    cantilever = cantilever,
    epsilon_s = ureg.Quantity(complex(20, 0), ''),
    sigma = ureg.Quantity(1e-7, 'S/m'),
    rho = ureg.Quantity(1e21, '1/m^3'),
    z_r = ureg.Quantity(100, 'nm')
)

In [40]:
sample3_jit = SampleModel3Jit(**sample3.args())

In [41]:
params3_jit = {'integrand': integrand3jit, 'sample': sample3_jit, 'omega': omega, 'location1': loc1, 'location2': loc2}
K_jit(**params3_jit)

array([0.88300038-0.00045539j, 0.84098605-0.00039175j,
       1.56188177-0.00065121j])

In [42]:
params3 = {'integrand': integrand3, 'sample': sample3, 'omega': omega_u, 'location1': loc1_u, 'location2': loc2_u}
K(**params3)

array([0.88300038-0.00045539j, 0.84098605-0.00039175j,
       1.56188177-0.00065121j])

The compiled `K_jit` function is about 100 faster with a Type III sample.

In [59]:
%%timeit
K_jit(**params3_jit)

3.7 ms ± 377 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [60]:
%%timeit
K(**params3)

438 ms ± 137 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Type IV sample

In [43]:
sample4 = SampleModel4(
    cantilever = cantilever,
    z_r = ureg.Quantity(100, 'nm')
)

In [44]:
sample4_jit = SampleModel4Jit(**sample4.args())

In [45]:
params4_jit = {'sample': sample4_jit, 'location1': loc1, 'location2': loc2}
Kmetal_jit(**params4_jit)

((0.9759000729485332-0j), (0.929428640903365+0j), (1.7260817616776778-0j))

In [46]:
params4 = {'sample': sample4, 'location1': loc1_u, 'location2': loc2_u}
Kmetal(**params4)

((0.9759000729485332-0j), (0.929428640903365+0j), (1.7260817616776778-0j))

In [47]:
Kmetalunits(**params4)

(<Quantity((0.009759000729485332-0j), '1 / nanometer')>,
 <Quantity((9.294286409033649e-05+0j), '1 / nanometer ** 2')>,
 <Quantity((1.7260817616776777e-06-0j), '1 / nanometer ** 3')>)

In [48]:
Kmetalunits_jit(**params4_jit)

(<Quantity((0.009759000729485332+0j), '1 / nanometer')>,
 <Quantity((9.294286409033649e-05+0j), '1 / nanometer ** 2')>,
 <Quantity((1.7260817616776777e-06+0j), '1 / nanometer ** 3')>)

The compiled function `Kmetal_jit` is **pleasingly fast**, 3.2 microseconds. \
The compiled function `Kmetal_jit` is about 85 times faster than the Python function `Kmetal`.

In [49]:
%%timeit
Kmetal_jit(**params4_jit)

2.31 μs ± 122 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [50]:
%%timeit
Kmetal(**params4)

271 μs ± 63.9 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


We see below that the function `Kmetalunits_jit` is **shockingly slow**, taking approximately 900 microseconds. \
The `Kmetal_jit` function takes just 2 to 3 microseconds, 100's of times faster.  

Why? The function `Kmetalunits_jit` is

    K0, K1, K2 = Kmetal_jit(sample, location1, location2)
    zr_u = ureg.Quantity(sample.z_r, 'm')
    K0u, K1u, K2u = K0/zr_u**1, K1/zr_u**2, K2/zr_u**3

In other words, add some units, and the function run time balloons to nearly 1 millisecond! \
**Lesson: Avoid computations with units; they are terribly slow.**

In [57]:
%%timeit
Kmetalunits_jit(**params4_jit)

The slowest run took 4.26 times longer than the fastest. This could mean that an intermediate result is being cached.
1.05 ms ± 615 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [58]:
%%timeit
Kmetalunits(**params4)

1.87 ms ± 482 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


# Thoughts

-  "The wrong way to speed up your code with Numba", Itamar Turner-Trauring, 2024-03-21 ([link](https://pythonspeed.com/articles/slow-numba/)).  Lesson: Programming with loops can actually be fast with numba, which is counter-intuitive.


::: {.content-hidden when-format="html"}

# Formatting notes

The header at the top of this file is for creating a nicely-formatted `.html` document using the program `quarto` ([link](https://quarto.org/)).  To create nicely-formated `.html`versions of this notebook, run `quarto` from the command line as follows

    quarto render dissipation-theory--Study-52.ipynb && open dissipation-theory--Study-52.html
    
:::