# np.array에서 for문이 효율적인가?

In [125]:
import numpy as np
import time

## 체크1

In [108]:
# dists = np.array([0.1234, 0.43534, 0.4545, 0.12398, 0.7789, 0.6583, 0.5563, 0.33423, 0.832423,])
# ids = np.array(list(range(9)))
# thresh = 0.0

In [121]:
%%timeit -n 100 -r 10
# dists = np.array([0.1234, 0.43534, 0.4545, 0.12398, 0.7789, 0.6583, 0.5563, 0.33423, 0.832423,])
# ids = np.array(list(range(9)))
# thresh = 0.001
dists = np.random.rand(100000)
ids = np.array(list(range(100000)))
thresh = 0.0
for idx, dist in enumerate(dists):
    if dist <= thresh:
        dist = dist[:idx]
        ids = ids[:idx]
        break
dists.flatten().tolist()

22 ms ± 342 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)


In [122]:
%%timeit -n 100 -r 10
# dists = np.array([0.1234, 0.43534, 0.4545, 0.12398, 0.7789, 0.6583, 0.5563, 0.33423, 0.832423,])
# ids = np.array(list(range(9)))
# thresh = 0.001
dists = np.random.rand(100000)
ids = np.array(list(range(100000)))
thresh = 0.0
indices = np.where(dists >= thresh)[0]
dists = dists[:len(indices)]
ids = ids[:len(indices)]

dists.flatten().tolist()

10.8 ms ± 110 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)


In [123]:
%%timeit -n 100 -r 10
# dists = np.array([0.1234, 0.43534, 0.4545, 0.12398, 0.7789, 0.6583, 0.5563, 0.33423, 0.832423,])
# ids = np.array(list(range(9)))
# thresh = 0.001
dists = np.random.rand(100000)
ids = np.array(list(range(100000)))
thresh = 0.0
indices = np.where(dists >= thresh)[0]
k = len(indices)
dists = dists[:k]
ids = ids[:k]

dists.flatten().tolist()

11.1 ms ± 175 µs per loop (mean ± std. dev. of 10 runs, 100 loops each)


In [124]:
%%timeit -n 100 -r 10
# dists = np.array([0.1234, 0.43534, 0.4545, 0.12398, 0.7789, 0.6583, 0.5563, 0.33423, 0.832423,])
# ids = np.array(list(range(9)))
# thresh = 0.001
dists = np.random.rand(100000)
ids = np.array(list(range(100000)))
thresh = 0.0
it = np.nditer(dists, flags=['c_index'])
while not it.finished:
    idx = it.index
    if dists[idx] <= thresh:
        break
    it.iternext()
dists = dists[:idx]
ids = ids[:idx]

dists.flatten().tolist()

36 ms ± 1.02 ms per loop (mean ± std. dev. of 10 runs, 100 loops each)


## 체크2

In [None]:
def array_time_check(thresh: float, iter_mode: int):
    # --faiss의 결과물이므로 자동으로 dists는 "가끼운 순으로 정렬"된다.
    dists = np.array([0.1234, 0.43534, 0.4545, 0.12398, 0.7789, 0.6583, 0.5563, 0.33423, 0.832423,])
    ids = np.array(list(range(9)))
    
    start_time = time.perf_counter()

    if iter_mode == 1:
        for idx, dist in enumerate(dists):
            if dist <= thresh:
                dist = dist[:idx]
                ids = ids[:idx]
                break
    elif iter_mode == 2:
        indices = np.where(dists >= thresh)[0]
        dists = dists[:len(indices)]
        ids = ids[:len(indices)]
    else:
        it = np.nditer(dists, flags=['c_index'])
        while not it.finished:
            idx = it.index
            if dists[idx] <= thresh:
                break
            it.iternext()
        dists = dists[:idx]
        ids = ids[:idx]
        
    dists.flatten().tolist(), ids.flatten().tolist()
    
    end_time = time.perf_counter()
    
    return f"실행 시간_time이 함수 안에 있을 때: {end_time - start_time:.5f}초: {iter_mode},\n{dists},\n{ids}"

start_time = time.perf_counter()

print(array_time_check(0.01, 1))

end_time = time.perf_counter()

print(f"실행 시간: {end_time - start_time:.5f}초, for사용")

print("-" * 80)

start_time = time.perf_counter()

print(array_time_check(0.01, 2))

end_time = time.perf_counter()

print(f"실행 시간: {end_time - start_time:.5f}초, where사용")

print("-" * 80)

start_time = time.perf_counter()

print(array_time_check(0.01, 3))

end_time = time.perf_counter()

print(f"실행 시간: {end_time - start_time:.5f}초, iter사용")

In [134]:
arr = np.random.rand(1, 10)
print(arr.shape)
print(arr.tolist())
print(arr.flatten())
print(arr.flatten().tolist())

(1, 10)
[[0.49652797037415364, 0.5688486949011543, 0.3534625928447229, 0.21231535865548912, 0.3835059781620266, 0.7413077356656254, 0.31619987147365525, 0.31173874513363653, 0.9703179855055901, 0.5290041201080805]]
[0.49652797 0.56884869 0.35346259 0.21231536 0.38350598 0.74130774
 0.31619987 0.31173875 0.97031799 0.52900412]
[0.49652797037415364, 0.5688486949011543, 0.3534625928447229, 0.21231535865548912, 0.3835059781620266, 0.7413077356656254, 0.31619987147365525, 0.31173874513363653, 0.9703179855055901, 0.5290041201080805]
