# **Project: CUDA Image Blurring in Python**

#Readme.md:

## Overview
This project implements **CUDA-accelerated Gaussian Blurring** using **Python and Numba**.

## Installation
1. Install dependencies:
   ```bash
   pip install opencv-python numba numpy

## Running
2. Run the script:
   ```bash
   python blur.py

In [1]:
import cv2
import numpy as np
from numba import cuda

@cuda.jit
def blur_kernel(input_img, output_img, width, height, channels):
    x, y = cuda.grid(2)
    if x >= width or y >= height:
        return

    kernel_size = 3
    kernel = np.array([[1/9, 1/9, 1/9], [1/9, 1/9, 1/9], [1/9, 1/9, 1/9]])

    half_size = kernel_size // 2
    for c in range(channels):
        sum_value = 0
        for ky in range(-half_size, half_size + 1):
            for kx in range(-half_size, half_size + 1):
                px = min(max(x + kx, 0), width - 1)
                py = min(max(y + ky, 0), height - 1)
                sum_value += input_img[py, px, c] * kernel[ky + half_size, kx + half_size]
        output_img[y, x, c] = sum_value

def apply_blur(input_path, output_path):
    img = cv2.imread(input_path)
    if img is None:
        print("Error: Could not load image!")
        return

    height, width, channels = img.shape
    d_input = cuda.to_device(img.astype(np.float32))
    d_output = cuda.device_array_like(img.astype(np.float32))

    threads_per_block = (16, 16)
    blocks_per_grid = ((width + 15) // 16, (height + 15) // 16)

    blur_kernel[blocks_per_grid, threads_per_block](d_input, d_output, width, height, channels)
    blurred_img = d_output.copy_to_host().astype(np.uint8)

    cv2.imwrite(output_path, blurred_img)
    print(f"Blurred image saved as {output_path}")

if __name__ == "__main__":
    apply_blur("input.jpg", "output.jpg")


CudaSupportError: Error at driver init: 

CUDA driver library cannot be found.
If you are sure that a CUDA driver is installed,
try setting environment variable NUMBA_CUDA_DRIVER
with the file path of the CUDA driver shared library.
: