In [1]:
print("""
@Description: 高性能的 Python
@Author(s): Stephen CUI
@LastEditor(s): Stephen CUI
@CreatedTime: 2023-08-04 14:09:45
""")


@Description: 高性能的 Python
@Author(s): Stephen CUI
@LastEditor(s): Stephen CUI
@CreatedTime: 2023-08-04 14:09:45



# 高性能的 Python

## 循环

### Python

In [2]:
import random
def average_py(n):
    s = 0
    for i in range(n):
        # 在 s 中添加区间（0,1）的均匀分布随机值
        s += random.random()
    return s / n
n = 1_000_000

In [3]:
%time average_py(n)

CPU times: total: 141 ms
Wall time: 195 ms


0.49951881340871107

In [4]:
%timeit average_py(n)

173 ms ± 34.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [5]:
%time sum([random.random() for _ in range(n)]) / n

CPU times: total: 250 ms
Wall time: 303 ms


0.49984418515735385

### Numpy

In [6]:
import numpy as np
def average_np(n):
    s = np.random.random(n)
    return s.mean()

In [7]:
%time average_np(n)

CPU times: total: 15.6 ms
Wall time: 37.3 ms


0.49978316925525684

In [8]:
%timeit average_np(n)

16.3 ms ± 4.76 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [11]:
s = np.random.random(n)
# 速度是很客观的，但是代价就是明显更高的内存使用率
s.nbytes

8000000

### Numba

In [16]:
import numba
average_nb = numba.jit(average_py, nopython = True)

In [19]:
%time average_nb(n)
# 编译发生在运行时，会带来一些开销

CPU times: total: 0 ns
Wall time: 14.7 ms


0.5002215669794038

In [20]:
%timeit average_nb(n)

7.84 ms ± 551 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


### Cython

In [21]:
%load_ext Cython

In [22]:
%%cython -a
import random
def average_cyl(int n):
    cdef int i
    cdef float s = 0
    for i in range(n):
        s += random.random()
    return s / n

Content of stdout:
_cython_magic_f4857492aee99b349bb47a8b6c276ca9bf4c3d0d.c
  ÕýÔÚ´´½¨¿â C:\Users\19243\.ipython\cython\Users\19243\.ipython\cython\_cython_magic_f4857492aee99b349bb47a8b6c276ca9bf4c3d0d.cp311-win_amd64.lib ºÍ¶ÔÏó C:\Users\19243\.ipython\cython\Users\19243\.ipython\cython\_cython_magic_f4857492aee99b349bb47a8b6c276ca9bf4c3d0d.cp311-win_amd64.exp
ÕýÔÚÉú³É´úÂë
ÒÑÍê³É´úÂëµÄÉú³É

In [23]:
%time average_cyl(n)

CPU times: total: 109 ms
Wall time: 147 ms


0.4999941885471344

In [24]:
%timeit average_cyl(n)
# 可以观察到一些加速，但是加速的效果很差，甚至没有 Numpy 版本的速度快

133 ms ± 38.3 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


有必要对 Cython 略作优化，这样它甚至可以击败 Numba 版本

In [25]:
%%cython
from libc.stdlib cimport rand
cdef extern from 'limits.h':
    int INT_MAX
cdef int i
cdef float n
for i in range(5):
    rn = rand() / INT_MAX
    print(rn)

Content of stdout:
_cython_magic_2c09ff1f7ff9fd0cf3fe9dd41bba1f8187538dc4.c
  ÕýÔÚ´´½¨¿â C:\Users\19243\.ipython\cython\Users\19243\.ipython\cython\_cython_magic_2c09ff1f7ff9fd0cf3fe9dd41bba1f8187538dc4.cp311-win_amd64.lib ºÍ¶ÔÏó C:\Users\19243\.ipython\cython\Users\19243\.ipython\cython\_cython_magic_2c09ff1f7ff9fd0cf3fe9dd41bba1f8187538dc4.cp311-win_amd64.exp
ÕýÔÚÉú³É´úÂë
ÒÑÍê³É´úÂëµÄÉú³É1.2555159634237718e-05
1.1391937738001318e-05
2.656597645327727e-06
1.3106036937379295e-05
1.084106043485974e-05


In [27]:
%%cython -a
from libc.stdlib cimport rand
cdef extern from 'limits.h':
    int INT_MAX
def average_cy2(int n):
    cdef int i
    cdef float s = 0
    for i in range(n):
        s += rand() / INT_MAX
    return s / n

Content of stdout:
_cython_magic_60791f5e104df5769058ba38c366eda3c6eca618.c
  ÕýÔÚ´´½¨¿â C:\Users\19243\.ipython\cython\Users\19243\.ipython\cython\_cython_magic_60791f5e104df5769058ba38c366eda3c6eca618.cp311-win_amd64.lib ºÍ¶ÔÏó C:\Users\19243\.ipython\cython\Users\19243\.ipython\cython\_cython_magic_60791f5e104df5769058ba38c366eda3c6eca618.cp311-win_amd64.exp
ÕýÔÚÉú³É´úÂë
ÒÑÍê³É´úÂëµÄÉú³É

In [28]:
%time average_cy2(n)

CPU times: total: 46.9 ms
Wall time: 41.7 ms


7.631375410710461e-06

In [29]:
%timeit average_cy2(n)

25.3 ms ± 2.52 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
