In [1]:
import numpy as np
from PIL import Image
import pycuda.autoinit
import pycuda.driver as cuda
from pycuda.compiler import SourceModule

In [22]:
def load_image(filename):
    img = Image.open(filename).convert('L')
    img_data = np.array(img, dtype=np.float32)
    return img_data, img.size

def save_image(image_data, filename):
    img = Image.fromarray(image_data.astype(np.uint8))
    img.save(filename)

mod = SourceModule("""
__global__ void median_filter(float *output, int width, int height, float *input) {
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;

    if (x >= width || y >= height) return;

    float window[9];
    int count = 0;
    for (int dy = -1; dy <= 1; ++dy) {
        for (int dx = -1; dx <= 1; ++dx) {
            int nx = min(max(x + dx, 0), width - 1);
            int ny = min(max(y + dy, 0), height - 1);
            window[count++] = input[ny * width + nx];  // Direct memory access
        }
    }

    // Sort the window
    for (int i = 0; i < 9; ++i) {
        for (int j = i + 1; j < 9; ++j) {
            if (window[i] > window[j]) {
                float temp = window[i];
                window[i] = window[j];
                window[j] = temp;
            }
        }
    }

    output[y * width + x] = window[4];  // Store the median
}
""")

def main(input_filename, output_filename):
    # Load the image
    img_data, (width, height) = load_image(input_filename)
    
    # Allocate device memory
    output = np.zeros_like(img_data)
    d_output = cuda.mem_alloc(output.nbytes)
    d_input = cuda.mem_alloc(img_data.nbytes)
    
    # Copy input image data to device memory
    cuda.memcpy_htod(d_input, img_data)
    
    # Launch the kernel
    func = mod.get_function("median_filter")
    block = (16, 16, 1)
    grid = (width // 16 + 1, height // 16 + 1)
    func(d_output, np.int32(width), np.int32(height), d_input, block=block, grid=grid)

    # Copy the result back to host
    cuda.memcpy_dtoh(output, d_output)

    # Save the output image
    save_image(output, output_filename)


kernel.cu

  mod = SourceModule("""


In [24]:
if __name__ == "__main__":
    input_filename = "input.png"
    output_filename = "output.png"
    main(input_filename, output_filename)