In [1]:
import numpy as np
import cupy as cp

In [3]:
xp = np

X = 1500
Y = 1500
Z = 20
K = 3
data = np.random.rand(X,Y,Z).astype('float32')

In [4]:
def vectors_comparison1(matrix1, matrix2):
    return (xp.power(matrix1 * matrix2, 2)).sum(axis=2)

def vectors_comparison2(vector1, vector2):
    res = xp.multiply(vector1, vector2)
    res = xp.sum(xp.power(res, 2), axis = 2)
    return res

def compare_matrices(a_pad, data, xk, yk, K, comp_function):
    comparison = comp_function(data, a_pad[xk:xk+data.shape[0], K+yk:K+yk+data.shape[1], :])
    res = comparison + xp.pad(comparison, ((xk, 0), (K, K)), constant_values=xp.nan)[0:data.shape[0], K-yk:K-yk+data.shape[1]]
    return res

def get_comparison(data, X, Y, Z, K, comp_function):
    K = K + 1
    a_pad = xp.pad(data, ((0, K), (K, K), (0, 0)), constant_values=xp.nan)
    result = []    
    for xk in range(0, K):
        for yk in range(0, K):
            if (xk!=0) or (yk!=0):   
                result.append(compare_matrices(a_pad, data, xk, yk, K, comp_function))
                if (yk!=0) and (xk!=0):
                    result.append(compare_matrices(a_pad, data, xk, -yk, K, comp_function)) 
    return xp.mean(result, axis=0)

In [5]:
#data_n = cp.asarray(data)

In [6]:
%%time
res1 = get_comparison(data, X, Y, Z, K, vectors_comparison1)

CPU times: user 12.5 s, sys: 1.61 s, total: 14.1 s
Wall time: 14.1 s


In [7]:
%%time
res2 = get_comparison(data, X, Y, Z, K, vectors_comparison2)

CPU times: user 12.5 s, sys: 1.61 s, total: 14.1 s
Wall time: 14.1 s


In [8]:
def compare_matrices(a_pad, data, xk, yk, K, comp_function):
    comparison = comp_function(data, a_pad[xk:xk+data.shape[0], K+yk:K+yk+data.shape[1], :])
    res = xp.pad(comparison, ((xk, 0), (K, K)), constant_values=xp.nan)[0:data.shape[0], K-yk:K-yk+data.shape[1]]
    return comparison, res

def get_comparison(data, X, Y, Z, K, comp_function):
    K = K + 1
    a_pad = xp.pad(data, ((0, K), (K, K), (0, 0)), constant_values=xp.nan)
    result = []    
    for xk in range(0, K):
        for yk in range(0, K):
            if (xk!=0) or (yk!=0):   
                result.extend(compare_matrices(a_pad, data, xk, yk, K, comp_function))
                if (yk!=0) and (xk!=0):
                    result.extend(compare_matrices(a_pad, data, xk, -yk, K, comp_function)) 
    return result

In [9]:
tmp1 = get_comparison(data, X, Y, Z, K, vectors_comparison2)

In [10]:
print(len(tmp1))

48


In [11]:
tmp1 = np.nanmean(tmp1, axis=0)
print(tmp1.shape)

(1500, 1500)


In [12]:
kernel_size = K*2 + 1
k = kernel_size // 2

data_n = data
i_range, x_range = data.shape[:2]
xp = np

def function(matrix1, matrix2):
    return (np.power(matrix1 * matrix2, 2)).sum(axis=2)

In [13]:
def run():
    padded_data = xp.pad(data_n, ((k, k), (k, k), (0, 0)), constant_values=xp.nan)

    res4 = []
    for i in range(kernel_size):
        for j in range(kernel_size):
            if i == j == k:
                continue

            shifted_data = padded_data[i:i+i_range, j:j+x_range]

            computed = vectors_comparison2(data_n, shifted_data)
            res4.append(computed)
    return res4

In [14]:
tmp2 = run()

In [15]:
print(len(tmp2))

48


In [16]:
tmp2 = np.nanmean(tmp2, axis=0)
print(tmp2.shape)

(1500, 1500)


In [17]:
np.allclose(tmp1, tmp2)

True

In [20]:
np.mean(tmp1 - tmp2)

9.208256e-11