In [33]:
import numpy as np
from numba import cuda
import time
import math

In [34]:
!nvidia-smi

Mon Oct  9 19:16:22 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   51C    P0    27W /  70W |    191MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [35]:
!lscpu

Architecture:            x86_64
  CPU op-mode(s):        32-bit, 64-bit
  Address sizes:         46 bits physical, 48 bits virtual
  Byte Order:            Little Endian
CPU(s):                  2
  On-line CPU(s) list:   0,1
Vendor ID:               GenuineIntel
  Model name:            Intel(R) Xeon(R) CPU @ 2.00GHz
    CPU family:          6
    Model:               85
    Thread(s) per core:  2
    Core(s) per socket:  1
    Socket(s):           1
    Stepping:            3
    BogoMIPS:            4000.41
    Flags:               fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mc
                         a cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscal
                         l nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopo
                         logy nonstop_tsc cpuid tsc_known_freq pni pclmulqdq sss
                         e3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes 
                         xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowpref

In [36]:
def cpu_mul(A, B, C):
    for i in range(C.shape[0]):
        for j in range(C.shape[1]):
            res = 0
            for k in range(A.shape[1]):
                res += A[i,k] * B[k,j]
            C[i,j] = res

@cuda.jit
def gpu_mul(A, B, C):
    i, j = cuda.grid(2)
    if i < C.shape[0] and j < C.shape[1]:
        tmp = 0
        for k in range(A.shape[1]):
            tmp += A[i, k] * B[k, j]
        C[i, j] = tmp

In [37]:
def compare(size: int):
    import warnings
    warnings.filterwarnings("ignore")

    # Инициализация матриц
    mc1 = np.random.randint(0, 5, (size, size))
    mc2 = np.random.randint(0, 5, (size, size))
    cres = np.zeros((size, size), dtype=int)

    mg1 = cuda.to_device(mc1)
    mg2 = cuda.to_device(mc2)
    gres = cuda.device_array((len(mc1), len(mc2)))

    # Ядро
    threadsperblock = (32, 32)
    blockspergrid_x = int(math.ceil(mc1.shape[0] / threadsperblock[0]))
    blockspergrid_y = int(math.ceil(mc2.shape[1] / threadsperblock[1]))
    blockspergrid = (blockspergrid_x, blockspergrid_y)

    start = time.time()
    cpu_mul(mc1, mc2, cres)
    tmc = time.time() - start
    #print(f'CPU: {tmc} s')

    start = time.time()
    gpu_mul[blockspergrid, threadsperblock](mg1, mg2, gres)
    tmg = time.time() - start
    #print(f'GPU: {tmg} s')

    return tmc, tmg

In [None]:
from matplotlib import pyplot as plt

sizes = [100, 200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000]
ctimes = []
gtimes = []
accelerations = []

for sz in sizes:
    c, g = compare(sz)
    ctimes.append(c)
    gtimes.append(g)
    accelerations.append(c / g)

plt.plot(sizes, ctimes, label='CPU', color='blue')
plt.plot(sizes, gtimes, label='GPU', color='orange')
plt.grid(True)
plt.legend()
plt.xlabel("Dim nxn")
plt.ylabel("t, s")
plt.title('Time')
plt.show()

plt.plot(sizes, accelerations, color='red')
plt.grid(True)
plt.xlabel("Dim nxn")
plt.title('Acceleration')
plt.show()