<a href="https://colab.research.google.com/github/Alinabelko/CUDA_labs/blob/main/CUDA_Lab0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from numba import cuda
from time import time
import math
import pandas as pd

Поэлементное умножение матриц

In [2]:
def simple_dot_cpu(a, b):
  n=len(a)
  c = np.zeros((n,n))
  start = time()
  for i in range(n):
    for j in range(n):
      for k in range(n):
        c[i, j] += a[i,k] * b[k,j]
  return c, time()-start

Умножение с помощью numpy

In [3]:
def np_dot_cpu(a, b):
  start = time()
  c = np.dot(a, b)
  return c, time() - start

Вычисление одного элемента с помощью CUDA

In [4]:
@cuda.jit
def simple_dot_gpu(a, b, c):
    i, j = cuda.grid(2)
    if i < c.shape[0] and j < c.shape[1]:
      tmp = 0
      for k in range(a.shape[1]):
        tmp += a[i, k] * b[k, j]
      c[i, j] = tmp

Умножение с помощью CUDA

In [5]:
def gpu_dot_exec(a, b, c, n):

  tread_number_block = 32

  a_global = cuda.to_device(a)
  b_global = cuda.to_device(b)
  c_global = cuda.device_array((n, n))
    
  # grid initialization
  threadsperblock = (tread_number_block, tread_number_block)
  blockspergrid_x = int(math.ceil(a.shape[0] / threadsperblock[1]))
  blockspergrid_y = int(math.ceil(b.shape[1] / threadsperblock[0]))
  blockspergrid = (blockspergrid_x, blockspergrid_y)

  start = time()
  simple_dot_gpu[blockspergrid, threadsperblock](a_global, b_global, c_global)
  gpu_time = time() - start
  c_gpu = c_global.copy_to_host() 
  return c_gpu, gpu_time

In [6]:
def time_test(n, count):  
  gpu_time = 0
  cpu_time = 0
  for _ in range(count):
    a = np.random.randint(0, 10, (n, n)).astype(np.float64)
    b = np.random.randint(0, 10, (n, n)).astype(np.float64)
    c = np.zeros((n, n)).astype(np.float64)
    
    c_gpu, gpu_time = gpu_dot_exec(a, b, c, n)
    start = time()
    c_cpu = simple_dot_cpu(a,b)
    cpu_time += time() - start

  print('Size', n)
  print('CPU:',cpu_time/count)
  print('GPU:',gpu_time/count)
  print('GPU acceletaion',cpu_time/gpu_time * 100, "%" )
  return cpu_time/gpu_time, c_cpu, c_gpu

In [7]:
def computation_error(n):
  a = np.random.randint(0, 10, (n, n)).astype(np.float64)
  b = np.random.randint(0, 10, (n, n)).astype(np.float64)
  c = np.zeros((n, n)).astype(np.float64)
  c_np = np_dot_cpu(a,b)[0]
  c_cpu = simple_dot_cpu(a,b)[0]
  c_gpu = gpu_dot_exec(a, b, c, n)[0]
  print("CPU computation error:", np.argmax(c_np-c_cpu))
  print("GPU computation error:", np.argmax(c_np-c_gpu))

In [None]:
computation_error(50)
res = np.zeros((5,2), dtype = np.float64)
res[0][0], res[0][1] = 10, time_test(10, 4)[0]
res[1][0], res[1][1] = 50, time_test(50, 4)[0]
res[2][0], res[2][1] = 250, time_test(250, 4)[0]
res[3][0], res[3][1] = 1000, time_test(1000, 4)[0]
res[4][0], res[4][1] = 1500, time_test(1500, 4)[0]

CPU computation error: 0
GPU computation error: 0
Size 10
CPU: 0.0008245706558227539
GPU: 7.081031799316406e-05
GPU acceletaion 1164.4781144781145 %
Size 50
CPU: 0.10008478164672852
GPU: 8.20159912109375e-05
GPU acceletaion 122030.81395348838 %
Size 250
CPU: 12.605998575687408
GPU: 9.781122207641602e-05
GPU acceletaion 12888090.249847654 %
