In [9]:
import numpy as np
import numba
from numba import cuda
import time
import plotly.express as px
import plotly.graph_objs as go

# Функция последовательного вычисления произведения матриц


In [26]:
@numba.jit(nopython=True)
def cpuMatMul(A, B):
    C = np.zeros((A.shape[0], B.shape[1]))
    for y in range(B.shape[1]):
        for x in range(A.shape[0]):
            tmp = 0
            for k in range(A.shape[1]):
                tmp += A[x, k] * B[k, y]
            C[x, y] = tmp

# Функция, исполняемая на графическом процессоре

In [30]:
@cuda.jit
def matMul(A, B, C):
    row, col = cuda.grid(2)
    
    if row < C.shape[0] and col < C.shape[1]:
        tmp = 0
        for n in range(A.shape[1]):
            tmp += A[row, n] * B[n, col]
        C[row, col] = tmp
    

In [34]:
N = 32
TPB = 32

device = cuda.get_current_device()

A = np.random.randint(1, 10, (N * TPB, N * TPB))
B = np.random.randint(1, 10, (N * TPB, N * TPB))

C_cpu = np.zeros((A.shape[0], B.shape[1]))

print(
      f"Shpae of A matrix: {A.shape}\n"
      f"Shpae of B matrix: {B.shape}\n"
      )

start = time.time()

d_A = cuda.to_device(A)
d_B = cuda.to_device(B)
d_C = cuda.device_array((A.shape[0], B.shape[1]))

threads_per_block = (TPB, TPB)
blocks_per_grid_x = int(np.ceil(A.shape[0] / threads_per_block[0]))
blocks_per_grid_y = int(np.ceil(B.shape[1] / threads_per_block[1]))
blocks_per_grid = (blocks_per_grid_x, blocks_per_grid_y)

matMul[blocks_per_grid, threads_per_block](d_A, d_B, d_C)
cuda.synchronize()

finish = time.time()

gpu_time = finish - start

C_gpu = d_C.copy_to_host()

start = time.time()

cpuMatMul(A, B)

finish = time.time()

cpu_time =  finish - start

#check_result = np.dot(A, B)

print(
      f"GPU computing time: {gpu_time} s\n"
      f"CPU computing time: {cpu_time} s\n"
     )


"""                                                            
CPU commputing without @numba.jit                            
CPU: 128x128     0.9691727161407471 s             
     512x512     60.58731579780578  s
     1024x1024   464.4956908226013  s
     1500x1500   1277.329903841018  s
     2048x2048   3779.119226694107  s
        
GPU: 128x128          0.15595412254333496 s
     512x512          0.2692432403564453  s
     1024x1024        0.342332124710083   s
     1504x1504        0.6738879680633545  s
     2048x2048        1.0551021099090576  s
"""     
"""
CPU commputing with @numba.jit
     100x100     0.15546512603759766  s  
     512x512     0.35259127616882324  s
     1024x1024   1.6671504974365234   s   
     1500x1500   4.66312313079834     s
     2048x2048   32.37240171432495    s
"""     
print()

Shpae of A matrix: (1024, 1024)
Shpae of B matrix: (1024, 1024)

GPU computing time: 0.33396482467651367 s
CPU computing time: 1.6060490608215332 s




# Графики 

In [15]:
dims = [128, 512, 1024, 1504, 2048]

cpu_time = np.array([0.9691727161407471, 60.58731579780578, 464.4956908226013, 1277.3299038410187, 3779.119226694107]) # seconds
numba_cpu_time = np.array([0.15546512603759766, 0.35259127616882324, 1.6671504974365234, 4.66312313079834, 32.37240171432495]) # seconds
gpu_time = np.array([0.15595412254333496, 0.2692432403564453, 0.342332124710083, 0.6738879680633545, 1.0551021099090576]) # seconds


In [25]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=dims, y=cpu_time, name="CPU"))
fig.add_trace(go.Scatter(x=dims, y=numba_cpu_time, name="CPU with numba"))
fig.add_trace(go.Scatter(x=dims, y=gpu_time, name="GPU"))

fig.update_layout(title="Dependence of the calculation execution time on the dimensions of the matrix ")

fig.update_yaxes(type='log', title="Time, s")
fig.update_xaxes(title="Dimension of matrix N x N")

fig.show()

In [24]:
boost = cpu_time / gpu_time
boost_numba = numba_cpu_time / gpu_time

fig = go.Figure()

fig.add_trace(go.Scatter(x=dims, y=boost, name="Boost for CPU without numba"))
fig.add_trace(go.Scatter(x=dims, y=boost_numba, name="Boost for CPU with numba"))

fig.update_layout(title="Dependence boost on the dimensions of matrix")

fig.update_yaxes(type='log', title="Boost")
fig.update_xaxes(title="Dimension of matrix N x N")

fig.show()