In [3]:
import numpy as np
import time
from numba import cuda, float32

# CPU функция для подчсета суммы
def sum_cpu(vector, repeat=10):
    start_time = time.perf_counter()
    for _ in range(repeat):
        result = np.sum(vector)
    elapsed_time = (time.perf_counter() - start_time) / repeat
    return result, elapsed_time

# GPU функция для подсчета суммы
@cuda.jit
def sum_gpu_kernel(vector, partial_sums):
    shared_mem = cuda.shared.array(512, dtype=float32)
    tid = cuda.threadIdx.x
    i = cuda.blockIdx.x * cuda.blockDim.x + tid

    # Загружаем данные в shared memory
    if i < vector.size:
        shared_mem[tid] = vector[i]
    else:
        shared_mem[tid] = 0
    cuda.syncthreads()

    # Редукция
    step = 1
    while step < cuda.blockDim.x:
        if tid % (2 * step) == 0:
            shared_mem[tid] += shared_mem[tid + step]
        cuda.syncthreads()
        step *= 2

    # Сохраняем результат
    if tid == 0:
        partial_sums[cuda.blockIdx.x] = shared_mem[0]

def sum_gpu(vector, repeat=10):
    threads_per_block = 512
    blocks_per_grid = (vector.size + threads_per_block - 1) // threads_per_block

    vector_device = cuda.to_device(vector)
    partial_sums_device = cuda.device_array(blocks_per_grid, dtype=np.float32)

    start_time = time.perf_counter()
    for _ in range(repeat):
        sum_gpu_kernel[blocks_per_grid, threads_per_block](vector_device, partial_sums_device)
        cuda.synchronize()
    elapsed_time = (time.perf_counter() - start_time) / repeat

    partial_sums = partial_sums_device.copy_to_host()
    total_sum = np.sum(partial_sums)
    return total_sum, elapsed_time

def main():
    vector_sizes = [1_000, 10_000, 100_000, 1_000_000, 10_000_000]
    results = []

    for size in vector_sizes:
        vector = np.random.rand(size).astype(np.float32)

        # CPU вычисление
        cpu_sum, cpu_time = sum_cpu(vector)

        # GPU вычисление
        gpu_sum, gpu_time = sum_gpu(vector)

        # Проверка корректности
        assert np.isclose(cpu_sum, gpu_sum, atol=1e-5), "Суммы не совпадают!"

        # Запись результатов
        if gpu_time == 0:
            speedup = float('inf')
        else:
            speedup = cpu_time / gpu_time
        
        results.append((size, cpu_time, gpu_time, speedup))

    print(f"{'Size':>10} | {'CPU Time (s)':>12} | {'GPU Time (s)':>12} | {'Speedup':>8}")
    print("-" * 50)
    for size, cpu_time, gpu_time, speedup in results:
        print(f"{size:>10} | {cpu_time:>12.6f} | {gpu_time:>12.6f} | {speedup:>8.2f}")

if __name__ == "__main__":
    main()



      Size | CPU Time (s) | GPU Time (s) |  Speedup
--------------------------------------------------
      1000 |     0.000013 |     0.015074 |     0.00
     10000 |     0.000007 |     0.000546 |     0.01
    100000 |     0.000040 |     0.000380 |     0.10
   1000000 |     0.000544 |     0.000390 |     1.39
  10000000 |     0.009921 |     0.002213 |     4.48
