In [3]:
import numpy as np
import cv2
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
import time

# =============================
# Parámetros
# =============================
image_path = r"C:\Users\s3_xc\OneDrive\Desktop\entrada.jpg"
output_path = r"C:\Users\s3_xc\OneDrive\Desktop\salida_gauss_pycuda131.jpg"
N = 131  # Tamaño del kernel (impar)
sigma = N / 3.0

# =============================
# Generar kernel gaussiano 2D
# =============================
def gaussian_kernel(N, sigma):
    half = N // 2
    ax = np.arange(-half, half + 1)
    xx, yy = np.meshgrid(ax, ax)
    kernel = np.exp(-(xx ** 2 + yy ** 2) / (2 * sigma ** 2))
    kernel /= kernel.sum()
    return kernel.astype(np.float32)

kernel = gaussian_kernel(N, sigma)
half = N // 2

# =============================
# Cargar imagen
# =============================
img = cv2.imread(image_path, cv2.IMREAD_COLOR)
if img is None:
    raise FileNotFoundError("No se pudo cargar la imagen.")

height, width, channels = img.shape
print(f"Imagen cargada: {width}x{height}, canales: {channels}")

# =============================
# Kernel CUDA
# =============================
mod = SourceModule(f"""
__global__ void gaussian_filter(
    unsigned char* input,
    unsigned char* output,
    float* kernel,
    int width, int height, int channels, int ksize)
{{
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;

    if (x >= width || y >= height) return;

    int half = ksize / 2;

    for (int c = 0; c < channels; c++) {{
        float sum = 0.0f;
        float weight_sum = 0.0f;

        for (int ky = -half; ky <= half; ky++) {{
            for (int kx = -half; kx <= half; kx++) {{
                int nx = min(max(x + kx, 0), width - 1);
                int ny = min(max(y + ky, 0), height - 1);
                float w = kernel[(ky + half) * ksize + (kx + half)];
                sum += w * input[(ny * width + nx) * channels + c];
                weight_sum += w;
            }}
        }}
        output[(y * width + x) * channels + c] = (unsigned char)(sum / weight_sum);
    }}
}}
""")

gauss_filter = mod.get_function("gaussian_filter")

# =============================
# Reservar memoria en GPU
# =============================
img_flat = img.astype(np.uint8).ravel()
out_flat = np.zeros_like(img_flat)
kernel_flat = kernel.ravel()

d_input = cuda.mem_alloc(img_flat.nbytes)
d_output = cuda.mem_alloc(out_flat.nbytes)
d_kernel = cuda.mem_alloc(kernel_flat.nbytes)

cuda.memcpy_htod(d_input, img_flat)
cuda.memcpy_htod(d_kernel, kernel_flat)

# =============================
# Configurar ejecución
# =============================
threads = (16, 16, 1)
blocks = ((width + threads[0] - 1) // threads[0],
          (height + threads[1] - 1) // threads[1], 1)

# =============================
# Ejecutar kernel y medir tiempo
# =============================
start = time.time()
gauss_filter(d_input, d_output, d_kernel,
             np.int32(width), np.int32(height),
             np.int32(channels), np.int32(N),
             block=threads, grid=blocks)
cuda.Context.synchronize()
end = time.time()

print(f"Tiempo GPU: {end - start:.4f} segundos")

# =============================
# Recuperar imagen procesada
# =============================
cuda.memcpy_dtoh(out_flat, d_output)
out_img = out_flat.reshape((height, width, channels))
cv2.imwrite(output_path, out_img)
print("Imagen guardada correctamente en:", output_path)


Imagen cargada: 6084x5093, canales: 3
Tiempo GPU: 6.6298 segundos
Imagen guardada correctamente en: C:\Users\s3_xc\OneDrive\Desktop\salida_gauss_pycuda131.jpg


In [4]:
import numpy as np
import cv2
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
import time

# =============================
# Parámetros
# =============================
image_path = r"C:\Users\s3_xc\OneDrive\Desktop\entrada.jpg"
output_path = r"C:\Users\s3_xc\OneDrive\Desktop\salida_gauss_pycuda11.jpg"
N = 11  # Tamaño del kernel (impar)
sigma = N / 3.0

# =============================
# Generar kernel gaussiano 2D
# =============================
def gaussian_kernel(N, sigma):
    half = N // 2
    ax = np.arange(-half, half + 1)
    xx, yy = np.meshgrid(ax, ax)
    kernel = np.exp(-(xx ** 2 + yy ** 2) / (2 * sigma ** 2))
    kernel /= kernel.sum()
    return kernel.astype(np.float32)

kernel = gaussian_kernel(N, sigma)
half = N // 2

# =============================
# Cargar imagen
# =============================
img = cv2.imread(image_path, cv2.IMREAD_COLOR)
if img is None:
    raise FileNotFoundError("No se pudo cargar la imagen.")

height, width, channels = img.shape
print(f"Imagen cargada: {width}x{height}, canales: {channels}")

# =============================
# Kernel CUDA
# =============================
mod = SourceModule(f"""
__global__ void gaussian_filter(
    unsigned char* input,
    unsigned char* output,
    float* kernel,
    int width, int height, int channels, int ksize)
{{
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;

    if (x >= width || y >= height) return;

    int half = ksize / 2;

    for (int c = 0; c < channels; c++) {{
        float sum = 0.0f;
        float weight_sum = 0.0f;

        for (int ky = -half; ky <= half; ky++) {{
            for (int kx = -half; kx <= half; kx++) {{
                int nx = min(max(x + kx, 0), width - 1);
                int ny = min(max(y + ky, 0), height - 1);
                float w = kernel[(ky + half) * ksize + (kx + half)];
                sum += w * input[(ny * width + nx) * channels + c];
                weight_sum += w;
            }}
        }}
        output[(y * width + x) * channels + c] = (unsigned char)(sum / weight_sum);
    }}
}}
""")

gauss_filter = mod.get_function("gaussian_filter")

# =============================
# Reservar memoria en GPU
# =============================
img_flat = img.astype(np.uint8).ravel()
out_flat = np.zeros_like(img_flat)
kernel_flat = kernel.ravel()

d_input = cuda.mem_alloc(img_flat.nbytes)
d_output = cuda.mem_alloc(out_flat.nbytes)
d_kernel = cuda.mem_alloc(kernel_flat.nbytes)

cuda.memcpy_htod(d_input, img_flat)
cuda.memcpy_htod(d_kernel, kernel_flat)

# =============================
# Configurar ejecución
# =============================
threads = (16, 16, 1)
blocks = ((width + threads[0] - 1) // threads[0],
          (height + threads[1] - 1) // threads[1], 1)

# =============================
# Ejecutar kernel y medir tiempo
# =============================
start = time.time()
gauss_filter(d_input, d_output, d_kernel,
             np.int32(width), np.int32(height),
             np.int32(channels), np.int32(N),
             block=threads, grid=blocks)
cuda.Context.synchronize()
end = time.time()

print(f"Tiempo GPU: {end - start:.4f} segundos")

# =============================
# Recuperar imagen procesada
# =============================
cuda.memcpy_dtoh(out_flat, d_output)
out_img = out_flat.reshape((height, width, channels))
cv2.imwrite(output_path, out_img)
print("Imagen guardada correctamente en:", output_path)


Imagen cargada: 6084x5093, canales: 3
Tiempo GPU: 0.0375 segundos
Imagen guardada correctamente en: C:\Users\s3_xc\OneDrive\Desktop\salida_gauss_pycuda11.jpg


In [None]:
import numpy as np
import cv2
import pycuda.driver as cuda
import pycuda.autoinit
from pycuda.compiler import SourceModule
import time

# =============================
# Parámetros
# =============================
image_path = r"C:\Users\s3_xc\OneDrive\Desktop\entrada.jpg"
output_path = r"C:\Users\s3_xc\OneDrive\Desktop\salida_gauss_pycuda35.jpg"
N = 35  # Tamaño del kernel (impar)
sigma = N / 3.0

# =============================
# Generar kernel gaussiano 2D
# =============================
def gaussian_kernel(N, sigma):
    half = N // 2
    ax = np.arange(-half, half + 1)
    xx, yy = np.meshgrid(ax, ax)
    kernel = np.exp(-(xx ** 2 + yy ** 2) / (2 * sigma ** 2))
    kernel /= kernel.sum()
    return kernel.astype(np.float32)

kernel = gaussian_kernel(N, sigma)
half = N // 2

# =============================
# Cargar imagen
# =============================
img = cv2.imread(image_path, cv2.IMREAD_COLOR)
if img is None:
    raise FileNotFoundError("No se pudo cargar la imagen.")

height, width, channels = img.shape
print(f"Imagen cargada: {width}x{height}, canales: {channels}")

# =============================
# Kernel CUDA
# =============================
mod = SourceModule(f"""
__global__ void gaussian_filter(
    unsigned char* input,
    unsigned char* output,
    float* kernel,
    int width, int height, int channels, int ksize)
{{
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;

    if (x >= width || y >= height) return;

    int half = ksize / 2;

    for (int c = 0; c < channels; c++) {{
        float sum = 0.0f;
        float weight_sum = 0.0f;

        for (int ky = -half; ky <= half; ky++) {{
            for (int kx = -half; kx <= half; kx++) {{
                int nx = min(max(x + kx, 0), width - 1);
                int ny = min(max(y + ky, 0), height - 1);
                float w = kernel[(ky + half) * ksize + (kx + half)];
                sum += w * input[(ny * width + nx) * channels + c];
                weight_sum += w;
            }}
        }}
        output[(y * width + x) * channels + c] = (unsigned char)(sum / weight_sum);
    }}
}}
""")

gauss_filter = mod.get_function("gaussian_filter")

# =============================
# Reservar memoria en GPU
# =============================
img_flat = img.astype(np.uint8).ravel()
out_flat = np.zeros_like(img_flat)
kernel_flat = kernel.ravel()

d_input = cuda.mem_alloc(img_flat.nbytes)
d_output = cuda.mem_alloc(out_flat.nbytes)
d_kernel = cuda.mem_alloc(kernel_flat.nbytes)

cuda.memcpy_htod(d_input, img_flat)
cuda.memcpy_htod(d_kernel, kernel_flat)

# =============================
# Configurar ejecución
# =============================
threads = (16, 16, 1)
blocks = ((width + threads[0] - 1) // threads[0],
          (height + threads[1] - 1) // threads[1], 1)

# =============================
# Ejecutar kernel y medir tiempo
# =============================
start = time.time()
gauss_filter(d_input, d_output, d_kernel,
             np.int32(width), np.int32(height),
             np.int32(channels), np.int32(N),
             block=threads, grid=blocks)
cuda.Context.synchronize()
end = time.time()

print(f"Tiempo GPU: {end - start:.4f} segundos")

# =============================
# Recuperar imagen procesada
# =============================
cuda.memcpy_dtoh(out_flat, d_output)
out_img = out_flat.reshape((height, width, channels))
cv2.imwrite(output_path, out_img)
print("Imagen guardada correctamente en:", output_path)


Imagen cargada: 6084x5093, canales: 3
Tiempo GPU: 0.4543 segundos
Imagen guardada correctamente en: C:\Users\s3_xc\OneDrive\Desktop\salida_gauss_pycuda35.jpg


: 