In [509]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.linalg import svd
import uuid
import cvxpy as cp
import torch.optim as optim
import torch.nn as nn
from scipy.io import loadmat
import h5py

# Set random seed for reproducibility
np.random.seed(42)

In [510]:
# Step 1: Define System and Simulation Parameters
N = 64  # Number of BS antennas
K = 4   # Number of users
M = 4   # Number of RF chains
omega = 0.3  # Tradeoff weight
I_max = 120  # Maximum outer iterations
J = 10  # Can be 1, 10, or 20

SNR_dB = 12  # SNR in dB
sigma_n2 = 1  # Noise variance
P_BS = sigma_n2 * 10**(SNR_dB / 10)  # Transmit power
mu = 0.01  # Step size for analog precoder
lambda_ = 0.01  # Step size for digital precoder
L = 20  # Number of paths for channel
num_realizations = 100  # Number of channel realizations


# Dataset parameters
num_channels = 100
num_epochs = 100 if J == 100 else 30
snr_min, snr_max = 0, 12  # dB

In [511]:
def to_tensor(x, dtype=None, device=device):
    """Convert numpy or tensor input to a torch tensor on the right device."""
    if isinstance(x, torch.Tensor):
        return x.to(device=device, dtype=dtype)
    elif isinstance(x, np.ndarray):
        return torch.as_tensor(x, dtype=dtype, device=device)
    else:
        raise TypeError(f"Unsupported type: {type(x)}")

def to_numpy(x):
    """Convert tensor to numpy array (CPU)"""
    if isinstance(x, torch.Tensor):
        return x.detach().cpu().numpy()
    elif isinstance(x, np.ndarray):
        return x
    else:
        raise TypeError(f"Unsupported type: {type(x)}")

In [512]:
# Step 2: Define Sensing Parameters
P = 3  # Number of desired sensing angles
theta_d = np.array([-60, 0, 60]) * np.pi / 180  # Desired angles in radians
delta_theta = 5 * np.pi / 180  # Half beamwidth
theta_grid = np.linspace(-np.pi / 2, np.pi / 2, 181)  # Angular grid [-90, 90] degrees
B_d = np.zeros(len(theta_grid))  # Desired beampattern
for t, theta_t in enumerate(theta_grid):
    for theta_p in theta_d:
        if abs(theta_t - theta_p) <= delta_theta:
            B_d[t] = 1

# Wavenumber and antenna spacing
lambda_wave = 1  # Wavelength (normalized)
k = 2 * np.pi / lambda_wave
d = lambda_wave / 2  # Antenna spacing

In [513]:
import torch

# Step 3: Channel Matrix Generation (Saleh-Valenzuela Model)
def generate_channel_batch(N, M, L, batch_size=1, device=device):
    """Generate batch of channels directly on GPU"""
    H = torch.zeros((batch_size, M, N), dtype=torch.cfloat, device=device)
    
    for _ in range(L):
        # Generate complex gains
        alpha_real = torch.randn(batch_size, device=device) / np.sqrt(2)
        alpha_imag = torch.randn(batch_size, device=device) / np.sqrt(2)
        alpha = torch.complex(alpha_real, alpha_imag).reshape(batch_size, 1, 1)
        
        # Random angles
        phi_r = torch.rand(batch_size, device=device) * 2 * np.pi
        phi_t = torch.rand(batch_size, device=device) * 2 * np.pi
        
        # Compute steering vectors
        n_r = torch.arange(M, device=device).float()
        n_t = torch.arange(N, device=device).float()
        
        # Broadcasting for batch computation
        a_r = torch.exp(1j * k * d * n_r.view(1, -1, 1) * torch.sin(phi_r).view(-1, 1, 1)) / np.sqrt(M)
        a_t = torch.exp(1j * k * d * n_t.view(1, 1, -1) * torch.sin(phi_t).view(-1, 1, 1)) / np.sqrt(N)
        
        # Outer product: a_r @ a_t.conj()
        H += np.sqrt(N * M / L) * alpha * torch.bmm(a_r, a_t.conj())
    
    return H.squeeze(0) if batch_size == 1 else H

# Steering vector function
def generate_channel(N, M, L):
    """Single channel generation (for compatibility)"""
    return generate_channel_batch(N, M, L, batch_size=1, device=device)

# Steering vector function - optimized for GPU
def steering_vector_batch(theta, N, device=device):
    """Compute steering vectors for batch of angles on GPU"""
    n = torch.arange(N, device=device).float()
    if theta.dim() == 0:  # scalar
        theta = theta.unsqueeze(0)
    # theta: (num_angles,), n: (N,)
    return torch.exp(1j * k * d * n.unsqueeze(0) * torch.sin(theta).unsqueeze(1)) / np.sqrt(N)

def steering_vector(theta, N):
    """Single steering vector"""
    theta_t = to_tensor(theta, dtype=torch.float32)
    return steering_vector_batch(theta_t, N).squeeze(0)


# Compute communication rate R - optimized
def compute_rate(H, A, D, sigma_n2):
    """Vectorized rate computation"""
    H_A = H @ A  # (K x M) @ (M x N) -> (K x N) or batch
    
    # Handle both single and batch inputs
    if H_A.dim() == 2:
        H_A = H_A.unsqueeze(0)
        D_expanded = D.unsqueeze(0)
        batch_mode = False
    else:
        D_expanded = D
        batch_mode = True
    
    batch_size = H_A.shape[0]
    
    # Compute all signals and interference at once
    # H_A: (batch, K, N), D: (batch, N, K) or (N, K)
    hk_dk = torch.einsum('bkn,bnk->bk', H_A.conj(), D_expanded)  # (batch, K)
    signal = torch.abs(hk_dk) ** 2  # (batch, K)
    
    # Total power per user
    hk_d = torch.einsum('bkn,bnj->bkj', H_A.conj(), D_expanded)  # (batch, K, K)
    total_power = torch.abs(hk_d) ** 2  # (batch, K, K)
    
    # Interference = total - signal
    interference = total_power.sum(dim=-1) - signal  # (batch, K)
    
    # SINR and rate
    SINR = signal / (interference + sigma_n2)
    R = torch.log2(1 + SINR).sum(dim=-1)  # (batch,)
    
    return R.squeeze() if not batch_mode else R

# Compute sensing error tau
def compute_tau(A, D, Psi, theta_grid_gpu):
    """Vectorized tau computation"""
    V = A @ D
    
    # Compute steering vectors for all angles at once
    a_theta = steering_vector_batch(theta_grid_gpu, A.shape[0])  # (num_angles, N)
    
    # Vectorized computation: a^H V V^H a - a^H Psi a
    VVH = V @ V.conj().T  # (N, N)
    
    # Batch matrix-vector products
    aH_VVH_a = torch.einsum('an,nm,am->a', a_theta.conj(), VVH, a_theta)
    aH_Psi_a = torch.einsum('an,nm,am->a', a_theta.conj(), Psi, a_theta)
    
    tau = torch.abs(aH_VVH_a - aH_Psi_a) ** 2
    return tau.mean()

def gradient_R_A(H, A, D, sigma_n2):
    """Optimized gradient computation for A"""
    xi = 1 / torch.log(torch.tensor(2.0, dtype=A.real.dtype, device=A.device))
    grad_A = torch.zeros_like(A)
    
    # Precompute V and V_full
    V = D @ D.conj().T  # (N, N)
    
    for k in range(K):
        h_k = H[k, :].reshape(-1, 1)  # (N, 1)
        H_tilde_k = h_k @ h_k.conj().T  # (N, N)
        
        # Compute V_bar_k more efficiently
        d_k = D[:, k].reshape(-1, 1)
        V_bar_k = V - d_k @ d_k.conj().T
        
        # Denominator terms
        AV = A @ V
        AVbar = A @ V_bar_k
        denom1 = torch.trace(AV @ A.conj().T @ H_tilde_k) + sigma_n2
        denom2 = torch.trace(AVbar @ A.conj().T @ H_tilde_k) + sigma_n2
        
        # Gradient contribution
        term1 = H_tilde_k @ AV / denom1
        term2 = H_tilde_k @ AVbar / denom2
        
        grad_A += xi * (term1 - term2)
    
    return grad_A

def gradient_R_D(H, A, D, sigma_n2):
    """Optimized gradient computation for D"""
    xi = 1 / torch.log(torch.tensor(2.0, dtype=A.real.dtype, device=A.device))
    grad_D = torch.zeros_like(D)
    
    # Precompute A^H
    AH = A.conj().T
    
    for k in range(K):
        h_k = H[k, :].reshape(-1, 1)
        H_tilde_k = h_k @ h_k.conj().T
        H_bar_k = AH @ H_tilde_k @ A  # (M, M)
        
        # Compute D_bar_k more efficiently
        d_k = D[:, k].reshape(-1, 1)
        DDH = D @ D.conj().T
        DDH_bar = DDH - d_k @ d_k.conj().T
        
        # Denominator terms
        denom1 = torch.trace(DDH @ H_bar_k) + sigma_n2
        denom2 = torch.trace(DDH_bar @ H_bar_k) + sigma_n2
        
        # Gradient contributions
        term1 = (H_bar_k @ D) / denom1
        term2 = (H_bar_k @ (D - d_k @ torch.eye(K, device=D.device)[k].reshape(1, -1))) / denom2
        
        grad_D += xi * (term1 - term2)
    
    return grad_D

def gradient_tau_A(A, D, Psi):
    """Optimized gradient for tau w.r.t. A"""
    DDH = D @ D.conj().T
    U = A @ DDH @ A.conj().T
    grad_A = 2 * (U - Psi) @ A @ DDH
    return grad_A

def gradient_tau_D(A, D, Psi):
    """Optimized gradient for tau w.r.t. D"""
    AHA = A.conj().T @ A
    U = A @ D @ D.conj().T @ A.conj().T
    grad_D = 2 * A.conj().T @ (U - Psi) @ A @ D
    return grad_D


In [None]:

with h5py.File('Psi_all.mat', 'r') as f:

    # Read MATLAB complex dataset properly
    Psi_h5 = f['Psi_all']

    # If it's a compound dtype (MATLAB complex), split real/imag
    if np.issubdtype(Psi_h5.dtype, np.void):
        real = Psi_h5['real'][()]  # convert to numpy array
        imag = Psi_h5['imag'][()]
        Psi_all = real + 1j*imag
    else:
        Psi_all = np.array(Psi_h5)

    SNR_dB = np.array(f['SNR_dB']).flatten()


# Access Psi
def compute_psi(snr_db):
    idx = np.argmin(np.abs(SNR_dB - snr_db))
    return Psi_all[idx, :, :]



['Psi_all', 'SNR_dB']
[('real', '<f8'), ('imag', '<f8')]
Psi_all shape: (121, 64, 64)
SNR_dB: [ 0.   0.1  0.2  0.3  0.4  0.5  0.6  0.7  0.8  0.9  1.   1.1  1.2  1.3
  1.4  1.5  1.6  1.7  1.8  1.9  2.   2.1  2.2  2.3  2.4  2.5  2.6  2.7
  2.8  2.9  3.   3.1  3.2  3.3  3.4  3.5  3.6  3.7  3.8  3.9  4.   4.1
  4.2  4.3  4.4  4.5  4.6  4.7  4.8  4.9  5.   5.1  5.2  5.3  5.4  5.5
  5.6  5.7  5.8  5.9  6.   6.1  6.2  6.3  6.4  6.5  6.6  6.7  6.8  6.9
  7.   7.1  7.2  7.3  7.4  7.5  7.6  7.7  7.8  7.9  8.   8.1  8.2  8.3
  8.4  8.5  8.6  8.7  8.8  8.9  9.   9.1  9.2  9.3  9.4  9.5  9.6  9.7
  9.8  9.9 10.  10.1 10.2 10.3 10.4 10.5 10.6 10.7 10.8 10.9 11.  11.1
 11.2 11.3 11.4 11.5 11.6 11.7 11.8 11.9 12. ]
(64, 64)
[[0.15625    0.01752379 0.05855906 ... 0.11866832 0.13690259 0.0114427 ]
 [0.01752379 0.15625    0.08262141 ... 0.03946006 0.07101367 0.13690259]
 [0.05855906 0.08262141 0.15625    ... 0.13355056 0.03946006 0.11866832]
 ...
 [0.11866832 0.03946006 0.13355056 ... 0.15625    0.082621

In [515]:
def project_unit_modulus(A):
    """Fast unit modulus projection"""
    return torch.exp(1j * torch.angle(A))

def project_power_constraint(A, D, P_BS):
    """Fast power constraint projection"""
    norm_factor = torch.linalg.norm(A @ D, ord='fro')
    D = D * (torch.sqrt(P_BS) / norm_factor)
    return D

In [516]:
def proposed_initialization(H, theta_d, N, M, K, P_BS):
    """Initialization - done on GPU"""
    # Convert to numpy for pinv operations (more stable)
    H_np = to_numpy(H)
    G = np.array([H_np[k, :] for k in range(K)]).T
    A0 = np.exp(1j * np.angle(G))
    X_ZF = np.linalg.pinv(H_np)
    D0 = np.linalg.pinv(A0) @ X_ZF
    D0 = np.sqrt(P_BS) * D0 / np.linalg.norm(A0 @ D0, 'fro')
    
    # Move back to GPU
    return to_tensor(A0, dtype=torch.cfloat), to_tensor(D0, dtype=torch.cfloat)

In [517]:
class UPGANetLayer(nn.Module):
    def __init__(self, N, M, K, omega, J=10, eta=None):
        super(UPGANetLayer, self).__init__()
        self.J = J
        self.N, self.M, self.K = N, M, K
        self.omega = omega
        self.eta = eta if eta is not None else 1/N
        
        # Learnable step sizes
        self.mu = nn.Parameter(torch.full((J,), 0.01, dtype=torch.float32))
        self.lambda_ = nn.Parameter(torch.tensor(0.01, dtype=torch.float32))
    
    def forward(self, H, A, D, Psi, sigma_n2, P_BS):
        # J inner updates for analog precoder
        for j in range(self.J):
            grad_RA = gradient_R_A(H, A, D, sigma_n2)
            grad_tauA = gradient_tau_A(A, D, Psi)
            A = A + self.mu[j] * (grad_RA - self.omega * grad_tauA)
            A = project_unit_modulus(A)
        
        # Digital precoder update
        grad_RD = gradient_R_D(H, A, D, sigma_n2)
        grad_tauD = gradient_tau_D(A, D, Psi)
        D = D + self.lambda_ * (grad_RD - self.omega * self.eta * grad_tauD)
        D = project_power_constraint(A, D, P_BS)
        
        return A, D

In [518]:
class UPGANet(nn.Module):
    def __init__(self, N, M, K, omega, I_max=120, J=10):
        super(UPGANet, self).__init__()
        self.layers = nn.ModuleList([
            UPGANetLayer(N, M, K, omega, J=J) for _ in range(I_max)
        ])
        self.I_max = I_max
        self.omega = omega
    
    def forward(self, H, A0, D0, Psi, sigma_n2, P_BS):
        A, D = A0, D0
        for i in range(self.I_max):
            A, D = self.layers[i](H, A, D, Psi, sigma_n2, P_BS)
        return A, D

In [519]:
def upganet_loss(H, A, D, Psi, sigma_n2, omega, theta_grid_gpu):
    """Loss computation using GPU theta_grid"""
    R = compute_rate(H, A, D, sigma_n2)
    tau = compute_tau(A, D, Psi, theta_grid_gpu)
    return -(R - omega * tau)

In [520]:
# Training loop
J_values = [1, 10, 20]
for J in J_values:
    print(f"\n=== Training with J = {J} ===")
    
    # Move model to GPU
    model = UPGANet(N, M, K, omega, I_max=I_max, J=J).to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    
    # Pre-generate SNR values on GPU
    snr_values = torch.arange(snr_min, snr_max, 0.1, device=device)
    
    for epoch in range(num_epochs):
        total_loss = 0.0
        
        for ch_idx in range(num_channels):
            # Generate channel on GPU
            H = generate_channel(N, M, L=3)
            
            # Random SNR
            snr_db = snr_values[torch.randint(len(snr_values), (1,), device=device)].item()
            
            # Get Psi on GPU
            Psi = compute_psi(snr_db)
            
            # Initialization on GPU
            A0, D0 = proposed_initialization(H, theta_d, N, M, K, P_BS)
            
            # Convert scalars to tensors on GPU
            sigma_n2_t = torch.tensor(sigma_n2, dtype=torch.float32, device=device)
            P_BS_t = torch.tensor(P_BS, dtype=torch.float32, device=device)
            
            # Forward pass
            A_final, D_final = model(H, A0, D0, Psi, sigma_n2_t, P_BS_t)
            
            # Compute loss
            loss = upganet_loss(H, A_final, D_final, Psi, sigma_n2_t, omega, theta_grid_gpu)
            
            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
            
            if (ch_idx + 1) % 10 == 0:
                print(f"Channel {ch_idx+1}/{num_channels} processed.")
        
        avg_loss = total_loss / num_channels
        print(f"[Epoch {epoch+1}/{num_epochs}] Average Loss: {avg_loss:.6f}")
        
        # Clear CUDA cache periodically
        if (epoch + 1) % 10 == 0:
            torch.cuda.empty_cache()


=== Training with J = 1 ===


RuntimeError: expected m1 and m2 to have the same dtype, but got: c10::complex<float> != float