 ## Первый раздел.
 ### Импорт библиотек, подключение CUDA devices

In [1]:
!pip install numba
!pip install py-cpuinfo

import numba
import math
import time
from  datetime  import  datetime 
import numpy as np
from numba import cuda, jit, float64, njit, prange, vectorize, int32
import pandas as pd
import cpuinfo 

Collecting py-cpuinfo
  Downloading py-cpuinfo-8.0.0.tar.gz (99 kB)
[K     |████████████████████████████████| 99 kB 5.4 MB/s 
[?25hBuilding wheels for collected packages: py-cpuinfo
  Building wheel for py-cpuinfo (setup.py) ... [?25l[?25hdone
  Created wheel for py-cpuinfo: filename=py_cpuinfo-8.0.0-py3-none-any.whl size=22258 sha256=4a574a40a543bffe1b3fd9ca4009c47b80261d2b38f0b230ee2f4bfe369c58b5
  Stored in directory: /root/.cache/pip/wheels/d2/f1/1f/041add21dc9c4220157f1bd2bd6afe1f1a49524c3396b94401
Successfully built py-cpuinfo
Installing collected packages: py-cpuinfo
Successfully installed py-cpuinfo-8.0.0



### *Проверяем, какое оборудование у нас подключенно.*

In [2]:
print('----------------------------------------------------------------------')
print('CPU, выделенный на сервере Colaboratory:', cpuinfo.get_cpu_info()['brand_raw'])
print('GPU, выделенный на сервере Colaboratory:', cuda.get_current_device())
print('----------------------------------------------------------------------')
numba.cuda.detect()

----------------------------------------------------------------------
CPU, выделенный на сервере Colaboratory: Intel(R) Xeon(R) CPU @ 2.30GHz
GPU, выделенный на сервере Colaboratory: <CUDA device 0 'b'Tesla K80''>
----------------------------------------------------------------------
Found 1 CUDA devices
id 0            b'Tesla K80'                              [SUPPORTED]
                      compute capability: 3.7
                           pci device id: 4
                              pci bus id: 0
Summary:
	1/1 devices are supported


True

### Второй раздел.

Входные данные: Вектора размером значений: <br/>
- 10 000
- 25 000 000
- 50 000 000
- 75 000 000 
- 100 000 000 <br/>

In [3]:
lens_vec = [10000, 25000000, 50000000, 75000000, 100000000]
vec = []

for i in range(len(lens_vec)):
  vec.append(np.random.randint(-100, 100, lens_vec[i]))

vec_CPU = []
vec_GPU = []

Arr_timeCPU = []
Arr_timeGPU = []
CPU_time =[]
GPU_time =[]


### Третий раздел.

*Сложение векторов на CPU* <br/>

In [4]:
def CPU_summ_v(vec):
  sum = 0
  for i in range(len(vec)):
    sum  = sum + vec[i]
  return sum

In [5]:
for i in range(5):
  start = time.time()
  vec_CPU.append(CPU_summ_v(vec[i]))
  Arr_timeCPU.append(time.time() - start)
  print('Sum of Vectors №',i+1,'on CPU:', round(Arr_timeCPU[i],2), "second")

Sum of Vectors № 1 on CPU: 0.0 second
Sum of Vectors № 2 on CPU: 6.56 second
Sum of Vectors № 3 on CPU: 13.11 second
Sum of Vectors № 4 on CPU: 19.58 second
Sum of Vectors № 5 on CPU: 26.35 second


### Четвертый раздел.

*Сложение векторов на GPU* <br/>

In [6]:
TPB = 32
count_block = []

for i in range(5):
  count_block.append(math.ceil((len(vec[i]))/TPB))

out = np.zeros(1, dtype = np.int64)

In [7]:
@cuda.jit
def dev_vec_sum(vect, c):
  idx = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
  sum = 0
  if idx < len(vect):
    
    sum += vect[idx]
    cuda.atomic.add(c, 0, sum)
    
def DeviceHost(vect, c, count_block):
    dev_a = cuda.to_device(vect)
    dev_c = cuda.to_device(c)
    
    dev_vec_sum[count_block, TPB](dev_a, dev_c)
    
    
    return dev_c.copy_to_host()

In [8]:
for i in range(5):
  start = time.time()
  vec_GPU.append(DeviceHost(vec[i], out, count_block[i]))
  Arr_timeGPU.append(time.time() - start)
  print('Sum of Vectors №',i+1,'on GPU:', round(Arr_timeGPU[i],2), "second")
  

Sum of Vectors № 1 on GPU: 0.55 second
Sum of Vectors № 2 on GPU: 0.08 second
Sum of Vectors № 3 on GPU: 0.16 second
Sum of Vectors № 4 on GPU: 0.21 second
Sum of Vectors № 5 on GPU: 0.26 second


### Пятый раздел.

*Итоги:* <br/>

In [10]:
check = 0
for i in range(5):
  if vec_CPU[i] == vec_GPU[i]:
      check += 1

if check == 5:
  print('Вектора перемноженные на CPU и GPU идентичны!')

for i in range(5):
  CPU_time.append(str(round(Arr_timeCPU[i], 2)))
  GPU_time.append(str(round(Arr_timeGPU[i], 2)))

Вектора перемноженные на CPU и GPU идентичны!


In [11]:


timing_df = pd.DataFrame({'Device':['CPU', 'GPU', 'CPU' ,'GPU', 'CPU', 'GPU', 'CPU' ,'GPU', 'CPU' ,'GPU'],
                          'Time(sec)':[CPU_time[0], GPU_time[0], CPU_time[1], GPU_time[1], CPU_time[2], GPU_time[2],
                                         CPU_time[3], GPU_time[3], CPU_time[4], GPU_time[4]],
                          'Matrix_size':[f"{lens_vec[0]}", f"{lens_vec[0]}",f"{lens_vec[1]}", f"{lens_vec[1]}",f"{lens_vec[2]}", f"{lens_vec[2]}",
                                         f"{lens_vec[3]}", f"{lens_vec[3]}",f"{lens_vec[4]}", f"{lens_vec[4]}"]})
                                         
timing_df



Unnamed: 0,Device,Time(sec),Matrix_size
0,CPU,0.0,10000
1,GPU,0.55,10000
2,CPU,6.56,25000000
3,GPU,0.08,25000000
4,CPU,13.11,50000000
5,GPU,0.16,50000000
6,CPU,19.58,75000000
7,GPU,0.21,75000000
8,CPU,26.35,100000000
9,GPU,0.26,100000000
