In [1]:
from python.im2col import im2col_SIMD
import numpy as np
from time import perf_counter_ns
import cv2

In [2]:
def im2col_ref(img, window):
    ker_h, ker_w = window[0], window[1]
    pad_h = ker_h//2
    pad_w = ker_w//2
    img_padded = cv2.copyMakeBorder(img,pad_h,pad_h,pad_w,pad_w,cv2.BORDER_CONSTANT)
    result = np.lib.stride_tricks.sliding_window_view(img_padded, window).reshape(-1,ker_h,ker_w)
    l, _, _ = result.shape
    return result.reshape(l,-1)

# Check Correctness

In [None]:
kernels = [(3,3),(5,5),(7,7),(9,9),(11,11),(7,3),(3,7),(11,3),(3,11)]
for ker in kernels:
    h_size, w_size = ker
    pad_h = h_size//2
    pad_w = w_size//2

    a = np.random.randint(0,255,(128,128)).astype('float32')
    result_1 = im2col_SIMD(a, h_size, w_size)
    result_2 = im2col_ref(a, (h_size,w_size))
    mat = np.mean(result_1 == result_2)

    print(f'Kernel {h_size}x{w_size} is correct:     \t', mat == 1)

# Check Speed

In [None]:
kernels = [(3,3),(5,5),(7,7),(9,9),(11,11),(7,3),(3,7),(11,3),(3,11)]
for ker in kernels:
    h_size, w_size = ker

    times = []
    all_times = []
    for _ in range(100):
        a = np.random.randint(0,255,(128,128)).astype('float32')
        for _ in range(5):
            start = perf_counter_ns()
            result = im2col_SIMD(a, h_size, w_size)
            times.append(perf_counter_ns()-start)
        all_times.append(min(times)/1e3)
    print(f'Time for kernel size {h_size}x{w_size}:   \t', np.round(np.mean(all_times),2), 'us')
