In [1]:
import numpy as np

# 这里缓存的队列数据，推荐使用numpy.array，对后续计算方便很多
# 动态添加数据的方法，参考vnpy.trader.utility.ArrayManager
data = np.random.rand(1000)

In [2]:
import heapq

class MidFinder:

    def __init__(self):
        self.min_heap = []
        self.max_heap = []
        self.count = 0

    def insert(self, num):
        heapq.heappush(self.min_heap, num)
        temp = heapq.heappop(self.min_heap)
        heapq.heappush(self.max_heap, -temp)
        
        if self.count & 1 == 0:
            temp = -heapq.heappop(self.max_heap)
            heapq.heappush(self.min_heap, temp)
        self.count += 1
        
    def get_heap_all(self):
        return self.min_heap + self.max_heap
    
    def get_lower_quartile(self):
        pass
    
    def getMedian(self) -> float:
        if self.count & 1 == 1:
            mid = self.min_heap[0]
        else:
            mid = (self.min_heap[0] + (-self.max_heap[0])) / 2
        #
        
        return mid

    def clear(self):
        self.min_heap = []
        self.max_heap = []
        self.count = 0

In [3]:
def test_finder():
    finder = MidFinder()
    for i in data:
        finder.insert(i)
    mid = finder.getMedian()
    return mid

print("result", test_finder())
%timeit test_finder()

result 0.481138709139433
1.58 ms ± 32.9 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [4]:
import statistics

def test_statistics():
    return statistics.median(data)

print("result", test_statistics())
%timeit test_statistics()

result 0.481138709139433
275 µs ± 5.31 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [5]:
def test_numpy():
    return np.median(data)

print("result", test_numpy())
%timeit test_numpy()

result 0.481138709139433
40.3 µs ± 894 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [6]:
import numba

@numba.jit("float64(float64[:])")
def test_numba(data):
    i = np.median(data)
    return i

print("result", test_numba(data))
%timeit test_numba(data)

result 0.481138709139433
39.4 µs ± 1.06 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [12]:
import numba

@numba.jit("float64(float64[:])")
@numba.jit(nopython=True)

def test_quan_numba(data):
    return np.quantile(data, [0.25, 0.75])


print("result", test_quan_numba(data))
%timeit test_quan_numba(data)



AttributeError: 'CPUDispatcher' object has no attribute '__defaults__'

In [10]:
def xxx():
    return np.quantile(data, [0.25, 0.75])

print("result", xxx())
%timeit xxx()

result [0.25097193 0.74470269]
87.6 µs ± 1.57 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
