In [1]:
import numpy as np

In [2]:
X = 1500
Y = 1500
Z = 5
K = 3
data = np.random.rand(X,Y,Z).astype('float32')

# Try

In [3]:
def vectors_comparison(vector1, vector2):
    res = np.multiply(vector1, vector2)
    res = np.sum(np.power(res, 2))
    return res

In [4]:
%%time

res = np.zeros(data.shape[0:2])
for x in range(X):
    for y in range(Y):       
        col = data[x, y, :]        
        for x_idx in range(-K, K + 1):
            for y_idx in range(-K, K + 1):
                if (0 <= x + x_idx < X) and (0 <= y + y_idx < Y):
                    res[x, y] += vectors_comparison(col, data[x + x_idx, y + y_idx])
        res[x, y] /= (min(x + x_idx, X - 1) - max(0, x - x_idx) + 1) * (min(y + y_idx, Y - 1) - max(0, y - y_idx) + 1) - 1 

CPU times: user 12min 30s, sys: 0 ns, total: 12min 30s
Wall time: 12min 30s


# Try: numba

In [5]:
import numba
from numba import njit

In [6]:
X = np.int32(1500)
Y = np.int32(1500)
Z = np.int32(20)
K = np.int32(3)
data = np.random.rand(X,Y,Z).astype('float32')

In [7]:
%%time

@njit(numba.types.float32(numba.types.float32[:], 
                          numba.types.float32[:]),
      fastmath=True)
def vectors_comparison(vector1, vector2):
    res = np.multiply(vector1, vector2)
    res = np.sum(np.power(res, 2))
    return res

@njit(numba.types.float32[:, :](numba.types.float32[:, :, :], 
                                numba.types.int32, 
                                numba.types.int32, 
                                numba.types.int32, 
                                numba.types.int32),
      locals={'res': numba.types.float32[:, :],
              'col': numba.types.float32[:],
              'accumulator': numba.types.float32,
              'count': numba.types.int32}, 
      parallel=True)
def get_comparison(data, X, Y, Z, K):
    res = np.zeros(data.shape[0:2], dtype='float32')
    for x in range(X):
        for y in range(Y):        
            col = data[x, y, :]        
            accumulator = np.float32(0.0)
            for x_idx in range(-K, K+1):
                for y_idx in range(-K, K+1):
                    if (0 <= x + x_idx < X) and (0 <= y + y_idx < Y):
                        accumulator += vectors_comparison(col, data[x + x_idx, y + y_idx])
            count = (min(x + x_idx, X - 1) - max(0, x - x_idx) + 1) * (min(y + y_idx, Y - 1) - max(0, y - y_idx) + 1) - 1
            res[x, y] = accumulator / count
    return res

CPU times: user 1.55 s, sys: 3.67 s, total: 5.22 s
Wall time: 1.16 s


In [8]:
%%time
res = get_comparison(data, X, Y, Z, K)

CPU times: user 16.4 s, sys: 80.5 ms, total: 16.5 s
Wall time: 16.4 s


In [9]:
%%time
res = get_comparison(data, X, Y, Z, K)

CPU times: user 16.5 s, sys: 52.6 ms, total: 16.6 s
Wall time: 16.5 s
