In [1]:
import numpy as np

def ecoformer_attention(Q, K, V, lam=1.0):
    """
    EcoFormer-style L1-based attention.
    
    Args:
        Q: Queries, shape (N, d_k)
        K: Keys, shape (N, d_k)
        V: Values, shape (N, d_v)
        lam: lambda parameter controlling sharpness of the kernel

    Returns:
        O: Output context vectors, shape (N, d_v)
    """
    N, d_k = Q.shape
    scale = lam / np.sqrt(d_k)
    O = np.zeros((N, V.shape[1]))

    for i in range(N):
        # Compute L1 distances between Q[i] and all K
        D = np.abs(K - Q[i])  # shape (N, d_k)
        dist = np.sum(D, axis=1)  # shape (N,)

        # Compute attention weights using Laplacian kernel
        weights = np.exp(-scale * dist)  # shape (N,)
        weights /= np.sum(weights)  # normalize

        # Weighted sum over values
        O[i] = weights @ V  # shape (d_v,)

    return O


# Toy input
Q = np.array([[1, 0, 0],
              [0, 1, 0],
              [0, 0, 1]], dtype=float)
# K = np.array([[1.0, 2.0], [1.5, 2.5]])
# V = np.array([[10.0, 20.0], [30.0, 40.0]])
K = Q.copy()
V = np.array([[1, 2, 3],
              [4, 5, 6],
              [7, 8, 9]], dtype=float)

O = ecoformer_attention(Q, K, V, lam=1.0)
print(O)

[[2.73977825 3.73977825 4.73977825]
 [4.         5.         6.        ]
 [5.26022175 6.26022175 7.26022175]]
