In [2]:
import orjson, json
import timeit
import time
import math
import numba as nb
import numpy as np
import numexpr as ne
import pandas as pd
import plotly.express as px
from plotly.offline import init_notebook_mode
init_notebook_mode(connected = True)

In [4]:
def py_damped_sine_func(x, A, lbda, omega, phi):
    return A * math.exp(-lbda * x) * math.sin(omega * x + phi)

def f_py_external(x_list, A, lbda, omega, phi, y_list):
    for i in range(len(x_list)):
        y_list[i] = py_damped_sine_func(x_list[i], A, lbda, omega, phi)

In [3]:
def f_numpy(x_array, A, lbda, omega, phi, y_array):
    return A * np.exp(-lbda * x_array) * np.sin(omega * x_array + phi)
    #np.sin(x_array, out=y_array)

In [4]:
@nb.jit(nopython=True, nogil=True, cache=True)
def numba_damped_sine_func(x, A, lbda, omega, phi):
    return A * np.exp(-lbda * x) * np.sin(omega * x + phi)              

@nb.jit(nopython=True, nogil=True, cache=True)
def f_numba(x_array, A, lbda, omega, phi, y_array):
    for i in range(x_array.size):
        y_array[i] = A * np.exp(-lbda * x_array[i]) * np.sin(omega * x_array[i] + phi)   
        
@nb.jit(nopython=True, nogil=True, cache=True)
def f_numba_external(x_array, A, lbda, omega, phi, y_array):
    for i in nb.prange(x_array.size):
        y_array[i] = numba_damped_sine_func(x_array[i], A, lbda, omega, phi)
        
@nb.jit(parallel=True, nopython=True, nogil=True, cache=True)
def f_numba_external_parall(x_array, A, lbda, omega, phi, y_array):
    for i in nb.prange(x_array.size):
        y_array[i] = numba_damped_sine_func(x_array[i], A, lbda, omega, phi)

In [916]:
def f_numexpr(x_array, A, lbda, omega, phi, y_array):
    ne.evaluate("A * exp(-lbda * x_array) * sin(omega * x_array + phi)", out=y_array)

In [917]:
funcs = [f_py_external, f_numpy, f_numba, f_numba_external_parall, f_numexpr]
sizes = [1, 1, 5, 10, 100, 500, 1_000, 2_000, 3_000, 5_000, 10_000, 20_000, 50_000, 100_000, 200_000, 500_000, 1_000_000]

In [918]:
funcs = [f_numpy, f_numba, f_numba_external_parall, f_numexpr]
sizes = [100, 200, 400, 600, 800, 1_000, 2_000, 3_000, 4_000, 5_000, 6_000, 7_000, 8_000, 9_000, 10_000, 12_000, 15_000]

In [919]:
times = np.zeros((len(sizes), len(funcs)))
compl_times = np.zeros(len(funcs))

In [920]:
A = 1.3
lbda = 1.7
omega = 8 * np.pi
phi = 0.5 * np.pi

In [921]:
REPEAT = 7
NUMBER_NOPY = 4_000
NUMBER_PY = 2
DTYPE = np.float64

In [922]:
# Compilation (1st run)
for j, f in enumerate(funcs):
    x_array = np.linspace(0, 1, num=1, dtype=DTYPE)
    y_array = np.empty_like(x_array)     
    if f.__name__.startswith('f_numba'):
        res = timeit.timeit(lambda: f(x_array, A, lbda, omega, phi, y_array), number=1)
    else:
        res = 0.0
    print(f'{f.__name__}: {res}')
    compl_times[j] = res

f_numpy: 0.0
f_numba: 0.005063636002887506
f_numba_external_parall: 0.005088444999273634
f_numexpr: 0.0


In [923]:
# Now, without compilation (multiple runs)    
for i, s in enumerate(sizes):
    for j, f in enumerate(funcs):
        x_array = np.linspace(0, 1, num=s, dtype=DTYPE)
        y_array = np.empty_like(x_array)     
        N = NUMBER_NOPY
        if f.__name__.startswith('f_py'):
            x_array = x_array.tolist()
            y_array = y_array.tolist()
            N = NUMBER_PY
        res = timeit.repeat(lambda: f(x_array, A, lbda, omega, phi, y_array), repeat=REPEAT, number=N)
        times[i, j] = np.sum(res) / REPEAT / NUMBER
        print(".", end="")
    print(s)

....100
....200
....400
....600
....800
....1000
....2000
....3000
....4000
....5000
....6000
....7000
....8000
....9000
....10000
....12000
....15000


In [924]:
l = [pd.DataFrame({'name': f.__name__, 
                   'x': sizes, 
                   'y': times[:, j],
                   'y_full': times[:, j] + compl_times[j]
                  }) 
     for j, f in enumerate(funcs)]
df = pd.concat(l)

In [930]:
fig = px.line(df, x='x', y='y', color='name', 
              log_x=True, log_y=True, markers=True, title='Without compilation time')
fig.update_xaxes(range=[1.95, 4.2])
fig.show()

In [932]:
fig = px.line(df, x='x', y='y_full', color='name', 
              log_x=True, log_y=True, markers=True, title='Including compilation time')
fig.update_xaxes(range=[1.95, 4.2])
fig.show()

In [5]:
x_array = np.linspace(0, 1, num=100_000, dtype=np.float64)
%timeit x_array.tolist()

3 ms ± 73.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [6]:
x_array = np.linspace(0, 1, num=100_000, dtype=np.float32)
%timeit x_array.tolist()

2.84 ms ± 64.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [940]:
x_array = np.linspace(0, 1, num=100_000, dtype=np.float16)
%timeit x_array.tolist()

2.19 ms ± 126 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [941]:
x_array = np.linspace(0, 1, num=10_000, dtype=np.float64)
%timeit x_array.tolist()

217 µs ± 25.9 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [943]:
f'{x_array}'
%timeit f'{x_array}'

161 µs ± 1.18 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [944]:
np.array_str(x_array)

'[0.00000000e+00 1.00010001e-04 2.00020002e-04 ... 9.99799980e-01\n 9.99899990e-01 1.00000000e+00]'

In [945]:
%timeit np.array_str(x_array)

164 µs ± 2.36 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [946]:
%timeit np.array_str(x_array).replace('\n', '')

164 µs ± 2.13 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [951]:
np.array_str(x_array, max_line_width=np.Inf, precision=4, suppress_small=True)

'[0.     0.0001 0.0002 ... 0.9998 0.9999 1.    ]'

In [952]:
%timeit np.array_str(x_array, max_line_width=np.Inf, precision=4, suppress_small=True)

156 µs ± 1.43 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [955]:
%timeit np.array_str(x_array, max_line_width=np.Inf, precision=4, suppress_small=True).replace(' ', ',')

153 µs ± 2.4 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [960]:
np.array_str(x_array, max_line_width=np.Inf, precision=4, suppress_small=True)

TypeError: _array_str_dispatcher() got an unexpected keyword argument 'separator'

In [964]:
%timeit np.array2string(x_array, separator=',', max_line_width=np.Inf, precision=4, suppress_small=True)

152 µs ± 2.76 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [7]:
np.array2string(x_array, separator=',', max_line_width=np.Inf, precision=4, suppress_small=True,floatmode='fixed')

'[0.0000,0.0000,0.0000,...,1.0000,1.0000,1.0000]'

In [11]:
%timeit np.array2string(x_array, separator=',', threshold=np.inf, max_line_width=np.inf, precision=4, suppress_small=True, floatmode='fixed')

2.9 s ± 241 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [10]:
%timeit np.array2string(x_array, separator=',', max_line_width=np.inf, precision=4, suppress_small=True, floatmode='fixed')

166 µs ± 9.91 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [12]:
%timeit np.array2string(x_array, threshold=np.inf)

2.04 s ± 59.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [13]:
%timeit str(x_array)

166 µs ± 2.81 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [14]:
str(x_array)

'[0.00000e+00 1.00001e-05 2.00002e-05 ... 9.99980e-01 9.99990e-01\n 1.00000e+00]'

In [15]:
%timeit x_array.tolist()

2.7 ms ± 66.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [16]:
%timeit str(x_array.tolist())

104 ms ± 2.49 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [19]:
%timeit f'{x_array.tolist()}'

103 ms ± 411 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [21]:
x_array = np.linspace(0, 1, num=5, dtype=np.float32)

In [22]:
np.array2string(x_array, separator=',', max_line_width=np.Inf, precision=4, suppress_small=True,floatmode='fixed')

'[0.0000,0.2500,0.5000,0.7500,1.0000]'

In [63]:
x_array = np.linspace(0, 1, num=100, dtype=np.float32)
def sss():
    s = np.array2string(x_array, separator=',', threshold=np.inf, max_line_width=np.inf, precision=4, suppress_small=True,floatmode='fixed')
    s = s.replace('[', '').replace(']', '') + ','
    s = s*1000
    s = f'[{s[:-1]}]'
    return s
def sss2():
    s = np.array2string(x_array, separator=',', threshold=np.inf, max_line_width=np.inf, precision=4, suppress_small=True,floatmode='fixed')
    s = s[1:]
    s = s[:-1]
    s = s + ','
    s = s*1000
    s = f'[{s[:-1]}]'
    return s

In [64]:
%timeit sss()

617 µs ± 16.2 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [65]:
%timeit sss2()

641 µs ± 70.3 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [62]:
sss2()

'[0.0000,0.2500,0.5000,0.7500,1.0000,0.0000,0.2500,0.5000,0.7500,1.0000]'

In [66]:
x_array[:10]

array([0.        , 0.01010101, 0.02020202, 0.03030303, 0.04040404,
       0.05050505, 0.06060606, 0.07070707, 0.08080808, 0.09090909],
      dtype=float32)

In [67]:
x_array.size

100

In [120]:
x_array = np.linspace(0, 1, num=20_000)

In [119]:
orjson.dumps(x_array, option=orjson.OPT_SERIALIZE_NUMPY).decode()

'[0.0,0.25,0.5,0.75,1.0]'

In [110]:
'redrawPlotWithNewCalculatedYJson({y:[' + orjson.dumps(x_array, option=orjson.OPT_SERIALIZE_NUMPY).decode() + ']})'

'redrawPlotWithNewCalculatedYJson({y:[[0.0,0.25,0.5,0.75,1.0]]})'

In [121]:
%timeit orjson.dumps(x_array, option=orjson.OPT_SERIALIZE_NUMPY).decode()

666 µs ± 54.7 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [122]:
%timeit 'redrawPlotWithNewCalculatedYJson({y:[' + orjson.dumps(x_array, option=orjson.OPT_SERIALIZE_NUMPY).decode() + ']})'

611 µs ± 6.37 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [115]:
%timeit f'redrawPlotWithNewCalculatedYJson(y:[{orjson.dumps(x_array, option=orjson.OPT_SERIALIZE_NUMPY).decode()}])'

4.39 ms ± 135 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [18]:
orjson.dumps({'y': x_array}, option=orjson.OPT_SERIALIZE_NUMPY)

b'{"y":[0.0,0.25,0.5,0.75,1.0]}'

In [11]:
data = {
"numpy": np.array([[1, 2], [3, 4]])
}
json_byte = orjson.dumps(data, option=orjson.OPT_NAIVE_UTC | orjson.OPT_SERIALIZE_NUMPY)
orjson.loads(json_byte)

{'numpy': [[1, 2], [3, 4]]}

In [16]:
x_array

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [14]:
np.array([1, 2])

array([1, 2])

In [100]:
x_array = np.linspace(0, 1, num=5, dtype=np.float64)

In [101]:
x_array.astype(str) #.tobytes().decode()

array(['0.0', '0.25', '0.5', '0.75', '1.0'], dtype='<U32')

In [103]:
x_array.astype(str).tobytes().decode(dtype='<U32')

TypeError: 'dtype' is an invalid keyword argument for decode()

In [95]:
%timeit x_array.tobytes()

100 ns ± 2.71 ns per loop (mean ± std. dev. of 7 runs, 10,000,000 loops each)


In [81]:
%timeit ",".join(np.char.mod('%f', x_array))

92.5 ms ± 1.32 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [84]:
%timeit orjson.dumps({'y': x_array}, option=orjson.OPT_SERIALIZE_NUMPY)

3.2 ms ± 345 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [86]:
json_bytes = orjson.dumps({'y': x_array}, option=orjson.OPT_SERIALIZE_NUMPY)
%timeit f'{json_bytes}'

7.36 ms ± 98.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [90]:
json_bytes = orjson.dumps({'y': x_array}, option=orjson.OPT_SERIALIZE_NUMPY)
%timeit json_bytes.decode() 

In [91]:
json_bytes.decode()

'{"y":[0.0,0.1111111111111111,0.2222222222222222,0.3333333333333333,0.4444444444444444,0.5555555555555556,0.6666666666666666,0.7777777777777777,0.8888888888888888,1.0]}'

In [73]:
json_bytes = orjson.dumps({'y': x_array}, option=orjson.OPT_SERIALIZE_NUMPY)
%timeit 'redrawPlotWithNewCalculatedYJson({y:[[' + f'{json_bytes})'[3:-3] + '})'

8.47 ms ± 130 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [20]:
%timeit x_array.tolist()

1.97 ms ± 62.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [21]:
%timeit f'{x_array.tolist()}'

73.2 ms ± 11.5 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [43]:
json_bytes = orjson.dumps({'y': x_array}, option=orjson.OPT_SERIALIZE_NUMPY)
#json_bytes

In [40]:
%timeit str(json_bytes)

8.33 ms ± 537 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [39]:
%timeit f'{json_bytes}'

8.03 ms ± 148 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [47]:
str(json_bytes)[3:]

'"y":[0.0,0.1111111111111111,0.2222222222222222,0.3333333333333333,0.4444444444444444,0.5555555555555556,0.6666666666666666,0.7777777777777777,0.8888888888888888,1.0]}\''

In [48]:
f'{json_bytes}'[3:]

'"y":[0.0,0.1111111111111111,0.2222222222222222,0.3333333333333333,0.4444444444444444,0.5555555555555556,0.6666666666666666,0.7777777777777777,0.8888888888888888,1.0]}\''

In [51]:
'redrawPlotWithNewCalculatedYJson(' + f'{json_bytes})'[2:-2] + ')'

'redrawPlotWithNewCalculatedYJson({"y":[0.0,0.1111111111111111,0.2222222222222222,0.3333333333333333,0.4444444444444444,0.5555555555555556,0.6666666666666666,0.7777777777777777,0.8888888888888888,1.0]})'

In [53]:
json_bytes = orjson.dumps(x_array, option=orjson.OPT_SERIALIZE_NUMPY)

In [54]:
json_bytes

b'[0.0,0.1111111111111111,0.2222222222222222,0.3333333333333333,0.4444444444444444,0.5555555555555556,0.6666666666666666,0.7777777777777777,0.8888888888888888,1.0]'

In [57]:
f'{json_bytes}'[3:-2]

'0.0,0.1111111111111111,0.2222222222222222,0.3333333333333333,0.4444444444444444,0.5555555555555556,0.6666666666666666,0.7777777777777777,0.8888888888888888,1.0'

In [60]:
'redrawPlotWithNewCalculatedYJson({y:[[' + f'{json_bytes})'[3:-3] + '})'

'redrawPlotWithNewCalculatedYJson({y:[[0.0,0.1111111111111111,0.2222222222222222,0.3333333333333333,0.4444444444444444,0.5555555555555556,0.6666666666666666,0.7777777777777777,0.8888888888888888,1.0})'

In [159]:
x = [8, 7, 9]

In [160]:
def calc(x):
    x[0] = 0

In [161]:
def timing(f):
    return timeit.timeit(lambda: f, number=1)

In [164]:
timing(calc(x))

9.350005711894482e-07

In [142]:
timeit.timeit(lambda: calc(x), number=1)

In [143]:
res

2.4900000425986946e-06

In [170]:
float(np.array(x).max())

9.0

In [171]:
np.array(x).ptp()

9

In [172]:
x_array = np.linspace(0, 1, num=100_000, dtype=np.float32)

In [176]:
%timeit x_array.min()

19.2 µs ± 863 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [174]:
%timeit x_array.ptp()

39.3 µs ± 1.5 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [175]:
%timeit x_array.max() - x_array.min()

37 µs ± 747 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [178]:
int(x_array.size)

100000

In [182]:
dtype = x_array.dtype

In [183]:
dtype(0.2)

TypeError: 'numpy.dtype[float32]' object is not callable

In [184]:
np.float32(0.2)

0.2

In [186]:
x_array.dtype.type(0.2)

0.2

In [187]:
dtype = x_array.dtype.type
dtype(0.3)

0.3

In [188]:
x_array = np.linspace(0, 1, num=100_000, dtype=np.float32)

In [192]:
s = 0.2
b = 20
%timeit s * x_array + b 

32.8 µs ± 6.98 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [193]:
s = 0.2
b = 20
%timeit x_array * s + b 

29.7 µs ± 224 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [194]:
s = np.float32(0.2)
b = np.float32(20)
%timeit x_array * s + b

28.2 µs ± 366 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [195]:
DATA = {
    "phases": [
      {
        "SrTiO3": {
          "_space_group_name_H-M_alt": "P m -3 m",
          "_cell_length_a": 3.9,
          "_cell_length_b": 3.9,
          "_cell_length_c": 3.9,
          "_cell_angle_alpha": 90,
          "_cell_angle_beta": 90,
          "_cell_angle_gamma": 90,
          "_atom_site": [
            {
              "_label": "Sr",
              "_type_symbol": "Sr",
              "_fract_x": 0.5,
              "_fract_y": 0.5,
              "_fract_z": 0.5,
              "_occupancy": 1,
              "_adp_type": "Biso",
              "_B_iso_or_equiv": 0.40
            },
            {
              "_label": "Ti",
              "_type_symbol": "Ti",
              "_fract_x": 0,
              "_fract_y": 0,
              "_fract_z": 0,
              "_occupancy": 1,
              "_adp_type": "Biso",
              "_B_iso_or_equiv": 0.50
            },
            {
              "_label": "O",
              "_type_symbol": "O",
              "_fract_x": 0.5,
              "_fract_y": 0,
              "_fract_z": 0,
              "_occupancy": 1,
              "_adp_type": "Biso",
              "_B_iso_or_equiv": 0.65
            }
          ]
        }
      }
    ],
    "experiments": [
      {
        "NPD": {
          "_diffrn_source": "nuclear reactor",
          "_diffrn_radiation_probe": "neutron",
          "_diffrn_radiation_wavelength": 1.27,
          "_pd_instr_resolution_u": 0.01,
          "_pd_instr_resolution_v": -0.01,
          "_pd_instr_resolution_w": 0.01,
          "_pd_instr_resolution_x": 0.1,
          "_pd_instr_resolution_y": 0,
          "_pd_instr_reflex_asymmetry_p1": 0,
          "_pd_instr_reflex_asymmetry_p2": 0,
          "_pd_instr_reflex_asymmetry_p3": 0,
          "_pd_instr_reflex_asymmetry_p4": 0,
          "_pd_meas_2theta_offset": 0,
          "_pd_meas_2theta_range_min": 0,
          "_pd_meas_2theta_range_max": 180,
          "_pd_meas_2theta_range_inc": 0.1,
          "_phase": [
            {
              "_label": "SrTiO3",
              "_scale": 0.01
            }
          ],
          "_pd_background": [
            {
              "_2theta": 50,
              "_intensity": 20
            },
            {
              "_2theta": 90,
              "_intensity": 0
            },
            {
              "_2theta": 140,
              "_intensity": 40
            }
          ]
        }
      }
    ]
}

In [196]:
background = DATA['experiments'][0]['NPD']['_pd_background']

In [198]:
[item['_2theta'] for item in background]

[50, 90, 140]