# Numba Lab

Now it's your turn!

Your challenge is to implement Fizz Buzz in CUDA Python with numba and calculate all instances of `Fizz`, `Buzz` and `Fizz Buzz` for the numbers between `1` and `50_000_000`.

In [30]:
#Import the libs
import numpy as np
from numba import cuda

In [31]:
#Setting the variables relating of the problem range, and the number of threads
problem_size = 50_000_000
num_threads = 128

**1. Create the input data array.** Try using `np.arange`.

In [34]:
input_data = np.arange(1, problem_size + 1, dtype=np.int64)
input_data

array([       1,        2,        3, ..., 49999998, 49999999, 50000000])

**2. Create the output data array.**

In [35]:
output = np.zeros(problem_size, dtype=np.int64)
output

array([0, 0, 0, ..., 0, 0, 0])

**3. Calculate the number of threads and blocks.**

In [36]:
num_blocks = np.ceil(problem_size / num_threads).astype(np.int64)
num_blocks

390625

**4. Create the kernel.** _Tip: Our output array must be numeric so try using the values `1`, `2` and `3` to represent `Fizz`, `Buzz` and `Fizz Buzz`._

In [37]:
@cuda.jit
def fizz_buzz(input_array, output_array):
    pos = cuda.grid(1)
    if pos < input_array.size:
      value = input_array[pos]
      if value % 15 == 0:
        output_array[pos] = 3 #FizzBus
      elif value % 5 == 0:
        output_array[pos] = 2 #Bus
      elif value % 3 == 0:
        output_array[pos] = 1 #Fizz
      else:
        output_array[pos] = 0


In [None]:
d_input_array = cuda.to_device(input_data)
d_output_array = cuda.to_device(output)

**5. Run our kernel.**

In [38]:
%%time
fizz_buzz[num_blocks, num_threads](d_input_array, d_output_array)

cuda.synchronize()

results = d_output_array.copy_to_host()

print(f"15th value: {results[14]}, expected: 3 (Fizz)")
print(f"20th value: {results[19]}, expected: 2 (Buzz)")
results[:15]

15th value: 3, expected: 3 (Fizz)
20th value: 2, expected: 2 (Buzz)
CPU times: user 270 ms, sys: 85.3 ms, total: 355 ms
Wall time: 389 ms


array([0, 0, 1, 0, 2, 1, 0, 0, 1, 2, 0, 1, 0, 0, 3])

**Extra Credit 6. Calculate the highest instance of `Fizz`.**

In [39]:
idx = np.max(np.where(input_data % 3 == 0))
input_data[idx]

49999998

In [11]:
from numba import cuda
import numpy as np

# Definindo a função FizzBuzz para rodar na GPU
@cuda.jit
def fizz_buzz(input_array, output_array):
    pos = cuda.grid(1)
    if pos < input_array.size:  # Verificando se a posição está dentro do range do array
        value = input_array[pos]
        if value % 15 == 0:
            output_array[pos] = 3  # FizzBuzz
        elif value % 5 == 0:
            output_array[pos] = 2  # Buzz
        elif value % 3 == 0:
            output_array[pos] = 1  # Fizz
        else:
            output_array[pos] = 0  # Nenhum

# Tamanho do problema
problem_size = 50_000_000

# Tamanho do bloco (número de threads por bloco)
block_size = 128

# Número de blocos
num_blocks = np.ceil(problem_size / block_size).astype(np.int64)

# Criando o array de números e o array de resultados
numbers = np.arange(1, problem_size + 1, dtype=np.int64)
results = np.zeros(problem_size, dtype=np.int64)

# Transferindo os dados para a GPU
d_numbers = cuda.to_device(numbers)
d_results = cuda.to_device(results)

# Executando a função na GPU
fizz_buzz[num_blocks, num_threads](d_numbers, d_results)

# Recuperando os resultados da GPU
results = d_results.copy_to_host()

# Contando as ocorrências de Fizz, Buzz e FizzBuzz
fizz_count = np.count_nonzero(results == 1)
buzz_count = np.count_nonzero(results == 2)
fizz_buzz_count = np.count_nonzero(results == 3)

print(f"Fizz: {fizz_count}, Buzz: {buzz_count}, FizzBuzz: {fizz_buzz_count}")


Fizz: 13333333, Buzz: 6666667, FizzBuzz: 3333333
