In [None]:
import time

from numba import cuda
import numpy as np # Arrays in Python

from math import sin, cos, pi
from matplotlib import pyplot as plt

px = np.zeros([1024,1024])

@cuda.jit
def kernel_generate_image(image, T):

    # Calculate the thread's absolute position within the grid
    x = cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x
    y = cuda.threadIdx.y + cuda.blockIdx.y * cuda.blockDim.y

    # Set stride equal to the number of threads we have available in either direction
    stride_x = cuda.gridDim.x * cuda.blockDim.x
    stride_y = cuda.gridDim.y * cuda.blockDim.y

    for i in range(x, image.shape[0], stride_x):
        for j in range(y, image.shape[1], stride_y):
            image[i, j] = (sin(i*2*pi/T+1)*sin(j*2*pi/T+1)*0.25)

kernel_generate_image[(16,16),(32,32)](px, 32)

fig, (ax2) = plt.subplots(1,1)
ax2.imshow(px)
plt.title('Stride loop (4/4)')
plt.savefig("pixelSquareComplete.png")
plt.show()

print(px.shape)

In [None]:
from Helpers import synchronous_kernel_timeit as sync_timeit

th = 10
execution_times_1 = []
execution_times_2 = []

'''
for i in range(32):
    t = sync_timeit( lambda: kernel_generate_image[(4,4),(i+1,i+1)](px, 32), number=10)
    execution_times_1.append(t)
'''
for i in range(1024):
    t = sync_timeit( lambda: kernel_generate_image[(16,16),(1,i+1)](px, 32), number=10)
    execution_times_2.append(t)  

In [None]:
fig = plt.figure(figsize=(15,8))
'''
plt.subplot(1,2,1)
plt.plot( [(i+1)**2 for i in range(len(execution_times_1))], execution_times_1 )
plt.subplot(1,2,2)
'''
plt.subplot(2,1,1)
plt.plot( [i+1 for i in range(len(execution_times_2))], execution_times_2 )
plt.title("Execution times with thread pool size per block ranging from 1 to 1024 in one dimension")
plt.axvline(32,linestyle='--',color='r')
plt.axvline(64,linestyle='--',color='r')
plt.xlabel('thread count per block')
plt.ylabel('execution time [s]')
plt.yscale("log")
plt.subplot(2,1,2)
plt.plot( [i+1 for i in range(len(execution_times_2))], execution_times_2 )
plt.xlim(20,75)
plt.ylim(0.01,0.02)
plt.title("Zoom in on the execution times with thread pool size per block ranging from 20 to 75 in one dimension")
#fig.tight_layout()
plt.subplots_adjust(hspace=0.33)
plt.axvline(32,linestyle='--',color='r')
plt.axvline(33,linestyle='--',color='r')
plt.axvline(64,linestyle='--',color='r')
plt.axvline(65,linestyle='--',color='r')
plt.xlabel('thread count per block')
plt.ylabel('execution time [s]')
#plt.legend(['Quadratic upsize','Linear upsize'])
plt.show()