## Radio interferometría y síntesis de imágenes en astronomía - Laboratorio 2

### Vicente Mieres

In [4]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [14]:
import sys, os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

# imports
import numpy as np
# modules
from modules.simulation import visibilities_simulation

In [7]:
# simulate visibilities no grid
VLA_L_4 = {
  "latitude": 34.078749,
  "longitude": -107.617728,
  "file_route": "../antenna_arrays/alma.cycle10.8.cfg",
  "catalog_source": "Sirius",
  "utc_start": "2024-10-21T00:00:00",
  "utc_end": "2024-10-21T06:00:00",
  "step_min": 5,
  "n_freqs": 4,
  "interferometer": {
    "name": "VLA",
    "band_name": "L" },
  "n_sources": 100,
  "max_offset_deg": 1.0,
  "flux_range": [0.1, 2.0],
  "seed": 42
  }

V, uvw_lambda, frequencies, baselines_enu = visibilities_simulation(VLA_L_4)

In [8]:
print(V.shape)

(903, 73, 4)


In [None]:
from modules.coords import max_basline
from modules.interferometry import grid_visibilities
import cupy as cp

# Resolucion
N = 4096
# Distancion maxima entre baselines
Dmax = max_basline(baselines_enu)
oversampling_factor = 10

c = 299792458.0 
freq = np.min(frequencies)
min_wavelenghgt = c / freq
dx = dy = (min_wavelenghgt / Dmax) / oversampling_factor

imgs = []
dus = []

du = 1.0 / (N * dx)
dv = du

In [30]:
%%time
VG, WG  = grid_visibilities(V, uvw_lambda, du, dv, Npix=N)

Gridding in GPU (CuPy Vectorized)
CPU times: total: 0 ns
Wall time: 10.8 ms


In [32]:
%%time
VG2, WG2 = grid_visibilities(V, uvw_lambda, du, dv, N, use_gpu=False)

Gridding in CPU (NumPy Vectorized)
CPU times: total: 46.9 ms
Wall time: 54.7 ms


In [39]:
from numba import cuda
import math
import cupy as cp

@cuda.jit
def grid_kernel_2d(VG, WG, V, u_coords, v_coords, du, dv, Npix):
    """
    Kernel de Numba CUDA para acumular en una grilla 2D[cite: 32].
    Cada hilo procesa una única visibilidad (baseline, time, freq).
    """
    # 1. Obtener el índice global del hilo (thread)
    # Asumimos una grilla 3D: (baselines, times, frequencies)
    idx, idy, idf = cuda.grid(3)

    # 2. Asegurarse de que el hilo está dentro de los límites de los datos
    # (Evita que hilos extra accedan a memoria inválida)
    if idx >= V.shape[0] or idy >= V.shape[1] or idf >= V.shape[2]:
        return

    # 3. Obtener los datos para este hilo [cite: 34]
    u = u_coords[idx, idy, idf]
    v = v_coords[idx, idy, idf]
    vis = V[idx, idy, idf]
    weight = 1.0  # Asumiendo peso uniforme (omega_f)

    # 4. Calcular índices de la grilla [cite: 34]
    i = int(round(u / du)) + Npix // 2
    j = int(round(v / dv)) + Npix // 2

    # 5. Aplicar máscara
    if i >= 0 and i < Npix and j >= 0 and j < Npix:
        
        # 6. Acumulación atómica en la grilla 2D [cite: 35]
        # ¡Esta es la parte crítica! Varios hilos pueden escribir
        # en la misma celda (j, i) al mismo tiempo.
        # "cuda.atomic.add" asegura que las sumas no se pierdan.
        cuda.atomic.add(VG.real, (j, i), weight * vis.real)
        cuda.atomic.add(VG.imag, (j, i), weight * vis.imag)
        cuda.atomic.add(WG, (j, i), weight)

# --- 2. Kernel de Normalización ---
@cuda.jit
def normalize_kernel_2d(VG, WG, Npix):
    """
    Kernel de Numba CUDA para normalizar la grilla 2D[cite: 36].
    Cada hilo procesa un píxel de la grilla final.
    """
    # Usaremos una grilla 2D que coincida con la forma de VG/WG
    j, i = cuda.grid(2)
    
    # Asegurarse de que el hilo está dentro de la grilla
    if j >= Npix or i >= Npix:
        return
    
    # Normalizar solo si hay peso acumulado    
    if WG[j, i] > 0:
        VG[j, i] /= WG[j, i]

# --- 3. Función "Wrapper" de Python ---
def grid_visibilities_numba(V_gpu, uvw_lambda_gpu, du, dv, Npix=256):
    """
    Función "wrapper" que lanza los kernels de Numba CUDA (Parte 3.2)
    para acumular en una grilla 2D.
    
    IMPORTANTE: Esta función DEBE recibir arrays de CuPy (GPU).
    """
    # 1. Obtener dimensiones de los datos de entrada
    # Asumimos que V_gpu es (n_baselines, n_times, n_freqs)
    n_baselines, n_times, n_freqs = V_gpu.shape
    
    u_coords_gpu = uvw_lambda_gpu[..., 0]
    v_coords_gpu = uvw_lambda_gpu[..., 1]
    
    # 2. Inicializar grillas 2D en GPU
    VG_gpu = cp.zeros((Npix, Npix), dtype=cp.complex128)
    WG_gpu = cp.zeros((Npix, Npix), dtype=cp.float64)
    
    # --- 3. Lanzar Kernel de Acumulación ---
    
    # Hilos por bloque (puedes ajustar estos valores)
    threads_per_block = (8, 8, 4) # (x, y, z) -> (baselines, times, freqs)
    
    # Bloques en la grilla (calculados para cubrir toda la data)
    blocks_per_grid_x = math.ceil(n_baselines / threads_per_block[0])
    blocks_per_grid_y = math.ceil(n_times / threads_per_block[1])
    blocks_per_grid_z = math.ceil(n_freqs / threads_per_block[2])
    blocks_per_grid = (blocks_per_grid_x, blocks_per_grid_y, blocks_per_grid_z)

    # Lanzar el kernel!
    grid_kernel_2d[blocks_per_grid, threads_per_block](
        VG_gpu, WG_gpu, V_gpu, u_coords_gpu, v_coords_gpu, du, dv, Npix
    )
    cuda.synchronize() # Esperar a que terminen todos los hilos
    
    # --- 4. Lanzar Kernel de Normalización ---
    
    # Configurar hilos/bloques para la grilla 2D (Npix x Npix)
    threads_per_block_norm = (16, 16) # (j, i)
    blocks_per_grid_j = math.ceil(Npix / threads_per_block_norm[0])
    blocks_per_grid_i = math.ceil(Npix / threads_per_block_norm[1])
    blocks_per_grid_norm = (blocks_per_grid_j, blocks_per_grid_i)
    
    normalize_kernel_2d[blocks_per_grid_norm, threads_per_block_norm](
        VG_gpu, WG_gpu, Npix
    )
    cuda.synchronize() # Esperar a que termine la normalización
    
    return VG_gpu, WG_gpu

In [40]:
VG, WG = grid_visibilities_numba(cp.asarray(V), cp.asarray(uvw_lambda), du, dv, N)

NvvmError: Failed to verify

error: Error: :  Function `_ZN8__main__14grid_kernel_2dB2v3B102cw51cXTLSUwv1sDUaKthaKSjYKCpo4DkeVtwuOooAA2wBSU0YECjxTmSMCR5IQmgJhmYBJIXbKvVYQGrbqUAjSB1oCSQZ1yrCQA_3dE5ArrayI10complex128Li2E1C7mutable7alignedE5ArrayIdLi2E1C7mutable7alignedE5ArrayI10complex128Li3E1C7mutable7alignedE5ArrayIdLi3E1A7mutable7alignedE5ArrayIdLi3E1A7mutable7alignedEddx' Basic Block `B360': 
  context:   %0 = atomicrmw fadd ptr %.667, double %.634 seq_cst, align 8
  atomicrmw does not support operation: 'fadd'.


NVVM_ERROR_INVALID_IR