In [1]:
import numpy as np
from numba import cuda, njit, jit
import math
import time
import pandas as pd
import cv2

np.random.seed = 12312

In [2]:
cuda.detect()

Found 1 CUDA devices
id 0    b'NVIDIA GeForce RTX 3060'                              [SUPPORTED]
                      Compute Capability: 8.6
                           PCI Device ID: 0
                              PCI Bus ID: 5
                                    UUID: GPU-fab3ebaa-82c9-7907-6668-b141e026c09b
                                Watchdog: Enabled
                            Compute Mode: WDDM
             FP32/FP64 Performance Ratio: 32
Summary:
	1/1 devices are supported


True

Вычитание векторов

In [3]:
@cuda.jit()
def vector_diff(vector_1, vector_2, vector_3):
    i = cuda.grid(1)
    vector_3[i] = vector_1[i] - vector_2[i]

In [4]:
def vector_diff_cuda(vector_1, vector_2):
    block = 256
    grid = math.ceil(len(vector_1) / block)
    print('size of grid:', grid, 'size of block:', block)

    s = time.time()
    vector_1_d = cuda.to_device(vector_1)
    vector_2_d = cuda.to_device(vector_2)
    vector_r_d = cuda.device_array_like(vector_1)
    f = time.time()
    print('loading time:', f - s)

    s = time.time()
    vector_diff[grid, block](vector_1, vector_2, vector_r_d)
    f = time.time()
    print('calculation time:', f - s)

    return vector_r_d.copy_to_host()

In [5]:
N = 1000000
x = np.random.random(N)
y = np.random.random(N)

In [6]:
start = time.time()
vector_cuda = vector_diff_cuda(x , y)
finish = time.time()
finish - start

size of grid: 3907 size of block: 256
loading time: 0.0890202522277832
calculation time: 0.2355334758758545




0.3265540599822998

In [7]:
start = time.time()
vector_np = x - y
finish = time.time()
finish - start

0.0030014514923095703

In [8]:
np.allclose(vector_cuda, vector_np)

True

Поиск символов в тексте

In [237]:
@cuda.jit
def str_bins(text, bin):
    i = cuda.grid(1)
    if i <= len(text):
        cuda.atomic.add(bin, text[i], 1)

In [238]:
with open('test.txt', encoding='Utf-8') as file:
    text = file.read()
text = list(text)
for i in range(len(text)):
    text[i] = ord(text[i])

text = np.array(text)
bins_letters = np.zeros(text.max() + 1)

In [253]:
TPB = 32
BPG = math.ceil(len(text) / TPB)

s1 = time.time()
s = time.time()
text_device = cuda.to_device(text)
bins_letters_device = cuda.to_device(bins_letters)
f = time.time()
print('loading time:', f - s)

s = time.time()
str_bins[BPG, TPB](text_device, bins_letters_device)
f = time.time()
print('calculation time:', f - s)

s = time.time()
bins_letters = bins_letters_device.copy_to_host()
f = time.time()
print('loading to host time:', f - s)

bins_cuda = {}
for i in range(1, len(np.unique(bins_letters))):
    value = np.unique(bins_letters)[i]
    for i in range(len(np.where(bins_letters == value)[0])):
        key = np.where(bins_letters == value)[0][i]
        bins_cuda[chr(key)] = value
print(time.time() - s1)

loading time: 0.0009999275207519531
calculation time: 0.0009996891021728516
loading to host time: 0.0020017623901367188
0.009002685546875


In [254]:
s = time.time()
bins_cpu = {}
for i in range(len(text)):
    if chr(text[i]) in bins_cpu:
        bins_cpu[chr(text[i])] += 1
    else:
        bins_cpu[chr(text[i])] = 1
f = time.time()
print(f'calculation cpu time:{f - s}')

calculation cpu time:0.07601547241210938


Перемножение матриц

In [11]:
@cuda.jit
def matmul(A, B, C):
    i, j = cuda.grid(2)
    if i < C.shape[0] and j < C.shape[1]:
        tmp = 0.
        for k in range(A.shape[1]):
            tmp += A[i, k] * B[k, j]
        C[i, j] = tmp

In [53]:
def matrx_mnoj_cuda(x, y):
    if x.shape[1] == y.shape[0]:
        block = (32, 32)
        grid = (math.ceil(x.shape[0] / block[0]), math.ceil(y.shape[1] / block[1]))
        print('size of block:', block, 'size of grid:', grid)
      
      
        s = time.time()
        shape = (x.shape[0], y.shape[1])
        x_d = cuda.to_device(x)
        y_d = cuda.to_device(y)
        r_d = cuda.device_array(shape)
        f = time.time()
        print('loading time:', f - s)

        s = time.time()
        matmul[grid, block](x_d, y_d, r_d)
        f = time.time()
        print('calculation time:', f - s)
        
        s = time.time()
        r_d.copy_to_host()
        f = time.time()
        print(f'copy to host time: {f - s}')

        return r_d
    else:
        raise ValueError('col != row')

In [49]:
matrix_1 = np.random.random((5000, 8000))
matrix_2 = np.random.random((8000, 5000))

In [55]:
start = time.time()
matrix_r_cuda = matrx_mnoj_cuda(matrix_1, matrix_2)
finish = time.time()
finish - start

size of block: (32, 32) size of grid: (157, 157)
loading time: 0.1090233325958252
calculation time: 0.0
copy to host time: 9.409118890762329


9.53214693069458

In [51]:
start = time.time()
matrix_r_np = matrix_1 @ matrix_2
finish = time.time()
finish - start

1.6853797435760498

In [52]:
np.allclose(matrix_r_cuda, matrix_r_np)

True

Кодирование изображения

In [3]:
@jit
def coding_cpu(palitre, img):
    img_coding = np.zeros([img.shape[0], img.shape[1]])

    for i in range(img.shape[0]):
        #print(f'{i / img.shape[0] * 100}%')
        
        for j in range(img.shape[1]):
            img_coding[i, j] = np.where((palitre == img[i, j]).all(axis=1))[0][0]

    return img_coding

  @jit


In [4]:
@cuda.jit
def coding_cuda(palitre, img, img_r):
    x, y = cuda.grid(2)
    if x <= img.shape[0] and y <= img.shape[1]:
        for i in range(palitre.shape[0]):
            value = 0
            for j in range(3):
                if palitre[i, j] == img[x, y, j]:
                    value += 1
            if value == 3:
                img_r[x, y] = i
                break
            else:
                value = 0

In [13]:
@cuda.jit
def coding_cuda_const(palitre, img, img_r):
    x, y = cuda.grid(2)
    palitre_c = cuda.const.array_like(palitre)
    if x <= img.shape[0] and y <= img.shape[1]:
        for i in range(palitre_c.shape[0]):
            value = 0
            for j in range(3):
                if palitre_c[i, j] == img[x, y, j]:
                    value += 1
            if value == 3:
                img_r[x, y] = i
                break
            else:
                value = 0

In [5]:
@jit
def decoding_cpu(palitre, img):
    img_decoding = np.zeros([img.shape[0], img.shape[1], 3])
    for i in range(img.shape[0]):
        for j in range(img.shape[1]):
            img_decoding[i, j] = palitre[int(img[i, j])]
    return img_decoding

  @jit


In [6]:
@cuda.jit
def decoding_cuda(palitre, img, img_r):
    x, y = cuda.grid(2)
    palitre = cuda.const
    if x <= img.shape[0] and y <= img.shape[1]:
        for w in range(3):
            img_r[x, y, w] = palitre[int(img[x, y]), w]

In [14]:
@cuda.jit
def decoding_cuda_const(palitre, img, img_r):
    x, y = cuda.grid(2)
    palitre_c = cuda.const.array_like(palitre)
    if x <= img.shape[0] and y <= img.shape[1]:
        for w in range(3):
            img_r[x, y, w] = palitre_c[int(img[x, y]), w]

In [7]:
def coding_decoding_cuda(palitre, img):
    TPB = (16, 16)
    BPG = (math.ceil(img.shape[0] / TPB[0]), math.ceil(img.shape[1] / TPB[1]))

    palitre_d = cuda.to_device(palitre)
    img_d = cuda.to_device(img)
    img_coding_d = cuda.device_array([img.shape[0], img.shape[1]])
    img_decoding_d = cuda.device_array([img.shape[0], img.shape[1], int(3)])

    coding_cuda[BPG, TPB](palitre_d, img_d, img_coding_d)
    decoding_cuda[BPG, TPB](palitre_d, img_coding_d, img_decoding_d)
    return img_decoding_d.copy_to_host(), img_coding_d.copy_to_host()

In [16]:
def coding_decoding_cuda_const(palitre, img):
    TPB = (16, 16)
    BPG = (math.ceil(img.shape[0] / TPB[0]), math.ceil(img.shape[1] / TPB[1]))

    img_d = cuda.to_device(img)
    img_coding_d = cuda.device_array([img.shape[0], img.shape[1]])
    img_decoding_d = cuda.device_array([img.shape[0], img.shape[1], int(3)])

    coding_cuda[BPG, TPB](palitre, img_d, img_coding_d)
    decoding_cuda[BPG, TPB](palitre, img_coding_d, img_decoding_d)
    return img_decoding_d.copy_to_host(), img_coding_d.copy_to_host()

In [45]:
img = cv2.imread('lab_4.bmp')

In [46]:
colors = list()
for i in range(img.shape[0]):
    for j in range(img.shape[1]):
        colors.append(tuple(img[i , j].tolist()))
print(len(colors) == (img.shape[0] * img.shape[1]))
colors = set(colors)
colors = list(colors)
colors = sorted(colors)
palitre = []
for i in range(len(colors)):
    palitre.append(list(colors[i]))
del colors
palitre = np.array(palitre)

True


In [47]:
s = time.time()
cpu_coding = coding_cpu(palitre, img)
cpu_decoding = decoding_cpu(palitre, cpu_coding)
f = time.time()
print(f'Time of coding, decoding with cpu: {f - s}')
print(f'True:{np.allclose(img, cpu_decoding)}')

Time of coding, decoding with cpu: 57.37299919128418
True:True


In [48]:
s = time.time()
cuda_decoding, cuda_coding = coding_decoding_cuda(palitre, img)
f = time.time()
print(f'Time of coding, decoding with cuda: {f - s}')
print(f'True:{np.allclose(img, cuda_decoding)}')
# print(np.allclose(img_coding, cuda_coding))
# print(np.where((img == cuda_coding) == False))

Time of coding, decoding with cuda: 0.23005461692810059
True:True


In [49]:
s = time.time()
cuda_decoding, cuda_coding = coding_decoding_cuda_const(palitre, img)
f = time.time()
print(f'Time of coding, decoding with cuda: {f - s}')
print(f'True:{np.allclose(img, cuda_decoding)}')
# print(np.allclose(img_coding, cuda_coding))
# print(np.where((img == cuda_coding) == False))

Time of coding, decoding with cuda: 0.12202811241149902
True:True
