In [1]:
import numpy as np
import numba
from numba import cuda
import math
from time import time
from PIL import Image

In [2]:
def salt_and_pepper_add(image, prob):
  rnd = np.random.rand(image.shape[0], image.shape[1])
  noisy = image.copy()
  noisy[rnd < prob] = 0
  noisy[rnd > 1 - prob] = 255
  return noisy

In [3]:
def median_filter(a):
  b = a.copy()
  start = time()
  for i in range(2, len(a)-1):
    for j in range(2, len(a[i])-1):
      t=[0, 0, 0, 0, 0, 0, 0, 0, 0]
      t[0], t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8] = a[i-1][j-1], a[i-1][j], a[i-1][j+1], a[i][j-1], a[i][j], a[i][j+1], a[i+1][j-1], a[i+1][j], a[i+1][j+1]
      for k in range(8):
        for l in range(8-k):
          if t[l]>t[l+1]:
            t[l], t[l+1] = t[l+1], t[l]
      b[i][j]=t[(int)(len(t)/2)]
  return b, time()-start

In [4]:
@cuda.jit
def gpu_median_filter(a, b):
  i, j = cuda.grid(2)
  t=cuda.local.array(shape=9, dtype=numba.int64)
  t[0], t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8] = a[i-1][j-1], a[i-1][j], a[i-1][j+1], a[i][j-1], a[i][j], a[i][j+1], a[i+1][j-1], a[i+1][j], a[i+1][j+1]
  for k in range(8):
    for l in range(8-k):
      if t[l]>t[l+1]:
        t[l], t[l+1] = t[l+1], t[l]
  b[i][j]=t[(int)(len(t)/2)]

In [5]:
def prepare_and_exec_gpu(a):
  b = a.copy()
  tread_number_block = 32

  a_global = cuda.to_device(a)
  b_global = cuda.to_device(b)
    
  threadsperblock = (tread_number_block, tread_number_block)
  blockspergrid_x = int(math.ceil(a.shape[0] / threadsperblock[1]))
  blockspergrid_y = int(math.ceil(b.shape[1] / threadsperblock[0]))
  blockspergrid = (blockspergrid_x, blockspergrid_y)

  start = time()
  gpu_median_filter[blockspergrid, threadsperblock](a_global, b_global)
  return b_global.copy_to_host(), time()-start 

In [6]:
def experiment(img_name, need_draw):
  im=(Image.open(img_name)).convert('L')
  if need_draw==True:
    display(im)
  img = np.array(im)

  img = salt_and_pepper_add(img, 0.09)
  if need_draw==True:
    display(Image.fromarray(np.uint8(img)))
    img3 = Image.fromarray(np.uint8(img))
    img3.save('SAP.jpg')

  img1, ctime = median_filter(img)
  if need_draw==True:
    display(Image.fromarray(np.uint8(img1)))
    img4 = Image.fromarray(np.uint8(img1))
    img4.save('CPU.jpg')

  img2, gtime = prepare_and_exec_gpu(img)
  if need_draw==True:
    display(Image.fromarray(np.uint8(img2)))
    img5 = Image.fromarray(np.uint8(img2))
    img5.save('GPU.jpg')
  
  n=len(img)*len(img[0])
  print('Количество элементов =', n)
  print('Время на CPU =', ctime)
  print('Время на GPU =', gtime)
  print('Ускорение =', ctime/gtime)
  print()
  return ctime/gtime, n

In [8]:
a1, n1 = experiment('price1.jpg', False)
a2, n2 = experiment('price2.jpg', False)
a3, n3 = experiment('price3.jpg', False)
a4, n4 = experiment('price4.jpg', False)

Количество элементов = 91908
Время на CPU = 1.2830479145050049
Время на GPU = 0.0008296966552734375
Ускорение = 1546.4060344827585

Количество элементов = 810000
Время на CPU = 10.96475863456726
Время на GPU = 0.0031464099884033203
Ускорение = 3484.847389558233

Количество элементов = 172800
Время на CPU = 2.316943883895874
Время на GPU = 0.0010488033294677734
Ускорение = 2209.1309388497384

Количество элементов = 1047552
Время на CPU = 14.559786081314087
Время на GPU = 0.003742694854736328
Ускорение = 3890.1878583259013

