Implementing a basic LCMV beamformer

In [None]:
import numpy as np
import soundfile as sf
from scipy.signal import stft, istft, windows

audio_path = '/kaggle/input/audios/audio_dataset/samsung_non_overlapping/02-25.12-20-54-168__WL_BH_d1m_Left_TC5.hdf/02-25.12-20-54-168__WL_BH_d1m_Left_TC5.wav'
audio, fs = sf.read(audio_path)  # audio shape: (n_samples, 4)
assert fs == 16000, f"Expected sampling rate 16000, but got {fs}"
assert audio.ndim == 2 and audio.shape[1] == 4, "Audio must be 4-channel"

audio = np.asarray(audio, dtype=np.float32)
print(f"Loaded audio with shape {audio.shape}, fs = {fs}")

In [None]:
mic_positions = np.array([
    [-0.05,  0.00,  0.00],  # Mic 1 (right side, x negative is right)
    [ 0.05,  0.00,  0.00],  # Mic 2 (left side)
    [-0.08,  0.045, 0.04],  # Mic 3 (upper-right-backwards)
    [ 0.08,  0.045, 0.04],  # Mic 4 (upper-left-backwards)
], dtype=np.float32)

# Near-field source position relative to array origin (mouth) in this coordinate system
# User specified source at [0, -0.06, 0]: 6 cm downward from array center
source_pos = np.array([0.0, -0.06, 0.0], dtype=np.float32)

# Far-field bystander DoA: specify azimuth angle in degrees in the horizontal plane (x-z plane)
# Azimuth 0° = directly in front (forward, which is -z direction)
# Positive azimuth = rotate toward left (positive x), negative = toward right
doa_noise_deg = 90.0  # example: 45° to the left-front quadrant
az = np.deg2rad(doa_noise_deg)
# Horizontal plane unit vector: [x, y=0, z]
# Front is -z; so:
doa_noise_vec = np.array([np.sin(az), 0.0, -np.cos(az)], dtype=np.float32)
# Normalize (should already be unit length)
doa_noise_vec /= np.linalg.norm(doa_noise_vec)

print(f"Mic positions:\n{mic_positions}")
print(f"Source position: {source_pos}")
print(f"Noise DoA vector (az={doa_noise_deg}°): {doa_noise_vec}")

# STFT parameters
n_fft = 512
hop_length = n_fft // 2
window = windows.hann(n_fft, sym=False)

In [None]:
c = 343.0  # speed of sound in m/s

def steering_vector_nearfield(mic_positions, source_pos, freqs, speed_of_sound=343.0, include_amplitude=False):
    """
    Compute near-field steering vector for given source position.
    mic_positions: (M,3)
    source_pos: (3,)
    freqs: array of frequencies (Hz), shape (F,)
    Returns: steering matrix of shape (F, M), complex
    """
    diffs = mic_positions - source_pos[None, :]  # (M,3)
    dists = np.linalg.norm(diffs, axis=1)       # (M,)
    # Avoid division by zero
    dists = np.maximum(dists, 1e-6)
    F = len(freqs)
    M = mic_positions.shape[0]
    a = np.zeros((F, M), dtype=np.complex64)
    for idx, f in enumerate(freqs):
        phase = np.exp(-1j * 2 * np.pi * f * dists / speed_of_sound)
        if include_amplitude:
            a[idx, :] = phase / dists
        else:
            a[idx, :] = phase
    return a  # shape (F, M)

def steering_vector_farfield(mic_positions, doa_vec, freqs, speed_of_sound=343.0):
    """
    Compute far-field steering vector for given DoA unit vector.
    mic_positions: (M,3)
    doa_vec: unit vector (3,)
    freqs: array of frequencies (Hz), shape (F,)
    Returns: steering matrix of shape (F, M), complex
    """
    proj = mic_positions.dot(doa_vec)  # (M,)
    F = len(freqs)
    M = mic_positions.shape[0]
    a = np.zeros((F, M), dtype=np.complex64)
    for idx, f in enumerate(freqs):
        a[idx, :] = np.exp(-1j * 2 * np.pi * f * proj / speed_of_sound)
    return a  # shape (F, M)

In [None]:
stfts = []
for ch in range(4):
    f, t_frames, Zxx = stft(audio[:, ch], fs=fs, window=window,
                            nperseg=n_fft, noverlap=n_fft-hop_length,
                            boundary=None, padded=False)
    stfts.append(Zxx)  # shape (F, T)
# Stack to shape (F, T, M)
stfts = np.stack(stfts, axis=2)
F_bins, T_frames, M = stfts.shape
assert M == 4
print(f"STFT computed: freq bins={F_bins}, time frames={T_frames}, channels={M}")
freqs = f  # frequencies for STFT bins

In [None]:
# Cell 5: Compute Steering Vectors for All Frequency Bins
# Near-field steering (fixed source)
a_s = steering_vector_nearfield(mic_positions, source_pos, freqs, speed_of_sound=c, include_amplitude=False)  # shape (F, M)
# Far-field steering for noise (fixed DoA example; recompute if DoA changes)
a_n = steering_vector_farfield(mic_positions, doa_noise_vec, freqs, speed_of_sound=c)  # shape (F, M)

In [None]:
W = np.zeros((F_bins, M), dtype=np.complex64)
eps = 1e-6
for k in range(F_bins):
    a_s_k = a_s[k, :]  # (M,)
    a_n_k = a_n[k, :]  # (M,)
    # Constraint matrix C: shape (M, 2)
    C = np.stack([a_s_k, a_n_k], axis=1)  # (M,2)
    CHC = np.conj(C.T) @ C  # (2,2)
    CHC += eps * np.eye(2)
    try:
        inv_CHC = np.linalg.inv(CHC)
    except np.linalg.LinAlgError:
        inv_CHC = np.linalg.pinv(CHC)
    g = np.array([1.0, 0.0], dtype=np.complex64)
    w0 = C @ (inv_CHC @ g)  # (M,)
    denom = np.vdot(w0, a_s_k)
    if np.abs(denom) < 1e-6:
        w = w0
    else:
        w = w0 / denom
    W[k, :] = w
print("Computed null-steering weights for all frequency bins.")


In [None]:
Y = np.zeros((F_bins, T_frames), dtype=np.complex64)
for t_idx in range(T_frames):
    X_ft = stfts[:, t_idx, :]  # shape (F, M)
    Y[:, t_idx] = np.sum(np.conj(W) * X_ft, axis=1)
_, output = istft(Y, fs=fs, window=window,
                 nperseg=n_fft, noverlap=n_fft-hop_length,
                 input_onesided=True, boundary=None)
# Trim or pad to match original length
output = output[:audio.shape[0]]
print(f"Beamformed output shape: {output.shape}")

In [None]:
output_path = 'beamformed_output.wav'
max_val = np.max(np.abs(output)) + 1e-9
if max_val > 1.0:
    output_norm = output / max_val
else:
    output_norm = output
sf.write(output_path, output_norm, fs)
print(f"Beamformed output written to {output_path}")