# 3DCT高速化に向けた NumPyファンシーインデックスに関する検証
- 問題 : 逆投影部分のfor文多重ループを用いた要素参照を行列演算でやりたい

In [1]:
import numpy as np

## 問題1
- 画像の(u(x,y,z),v(x,y,z))にアクセスしたい
    以下のコードで実行できた

In [2]:
img = np.array([["a","b"],["c","d"]])

x_img = np.array([[[0,0],[0,0]],[[1,1],[1,1]]])
y_img = np.array([[[0,1],[0,1]],[[1,0],[1,0]]])

In [3]:
result = img[x_img,y_img]

In [4]:
result

array([[['a', 'b'],
        ['a', 'b']],

       [['d', 'c'],
        ['d', 'c']]], dtype='<U1')

In [6]:
result2 = np.empty((x_img.shape[0],x_img.shape[1],x_img.shape[2]),dtype=str)

for i in range(x_img.shape[0]):
    for j in range(x_img.shape[1]):
        for k in range(x_img.shape[2]):
            result2[i,j,k] = img[x_img[i,j,k],y_img[i,j,k]]

result2

array([[['a', 'b'],
        ['a', 'b']],

       [['d', 'c'],
        ['d', 'c']]], dtype='<U1')

- ひとまずこの大きさでベンチマーク

In [7]:
%%timeit
for i in range(x_img.shape[0]):
    for j in range(x_img.shape[1]):
        for k in range(x_img.shape[2]):
            result2[i,j,k] = img[x_img[i,j,k],y_img[i,j,k]]

result2

14.3 µs ± 49.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [8]:
%%timeit
result = img[x_img,y_img]

4.27 µs ± 21.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


- おいおい，この時点で全然速さ違うぞ

### 中規模ベンチマークしてみよう

In [9]:
img2 = np.arange(1000).reshape((10,10,10))
img2[3,2,1]

321

In [12]:
x_img2 = np.arange(10)
x_img2 = np.tile(x_img2,(10,10,1))
x_img2

array([[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]],

       [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]],

       [[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 

In [13]:
y_img2 = x_img2.transpose(0,2,1)
y_img2

array([[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
        [3, 3, 3, 3, 3, 3, 3, 3, 3, 3],
        [4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
        [5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
        [6, 6, 6, 6, 6, 6, 6, 6, 6, 6],
        [7, 7, 7, 7, 7, 7, 7, 7, 7, 7],
        [8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
        [9, 9, 9, 9, 9, 9, 9, 9, 9, 9]],

       [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
        [3, 3, 3, 3, 3, 3, 3, 3, 3, 3],
        [4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
        [5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
        [6, 6, 6, 6, 6, 6, 6, 6, 6, 6],
        [7, 7, 7, 7, 7, 7, 7, 7, 7, 7],
        [8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
        [9, 9, 9, 9, 9, 9, 9, 9, 9, 9]],

       [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
        [3, 3, 3, 3, 3, 3, 3, 3, 3, 3],
        [4, 4, 4, 4, 4, 4, 4, 4, 4, 

In [17]:
z_img2 = x_img2.transpose(2,1,0)
z_img2

array([[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],

       [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]],

       [[2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
        [2, 2, 2, 2, 2, 2, 2, 2, 2, 

In [19]:
result_for = np.empty((x_img2.shape[0],x_img2.shape[1],x_img2.shape[2]),dtype=int)

In [21]:
%%timeit
for i in range(x_img2.shape[0]):
    for j in range(x_img2.shape[1]):
        for k in range(x_img2.shape[2]):
            result_for[i,j,k] = img2[x_img2[i,j,k],y_img2[i,j,k],z_img2[i,j,k]]

1.17 ms ± 12.8 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [27]:
result_fancy = np.empty((x_img2.shape[0],x_img2.shape[1],x_img2.shape[2]),dtype=int)

In [28]:
%%timeit
result_fancy = img2[x_img2,y_img2,z_img2]

22.7 µs ± 531 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [30]:
np.all(result_for == result_fancy)

True

## 結果
- ファンシーインデックスを用いてfor文アクセスと同じ配列を作れた．
- 実行速度
    - for文 : 1.17ms
    - ファンシーインデックス : 22.7 us

## 問題2
- index配列の次元数が違うとどうなるか

In [32]:
x2_img = np.array([0,1])
y2_img = np.array([[0,0],[1,1]])

result_dim = img[x2_img,y2_img]
result_dim

array([['a', 'c'],
       ['b', 'd']], dtype='<U1')

In [38]:
x2_img.shape

(2,)

- ブロードキャストが起こった
    - 足りないほうの次元を拡張し，タイリングした