# Benign Overfitting in Single-Head Attention

## Data generation

In [1]:
# Imports
import numpy as np

In [2]:
# Hyperparameters
n_samples = 200
d = 40000
rho = 30
eta = 0.05

In [3]:
# Generate data function
def generate_data(n_samples=n_samples, d=d, rho=rho, eta=eta):
    """
    Parameters:
    - n_samples: Number of data points to generate.
    - d: Dimensionality of the feature space.
    - rho: Norm of the signal vectors (mu1 and mu2).
    - eta: Label-flipping probability (default: 0.0).

    Returns:
    - X: Generated data points, shape (n_samples, 2, d).
    - y: Labels, shape (n_samples,).
    - clean_1: non-flipped indices for which u = mu1
    - clean_2: non-flipped indices for which u = mu2
    - noisy_1: flipped indices for which u = mu1
    - noisy_2: flipped indices for which u = mu2
    """

    # Generate orthogonal signal vectors mu1 and mu2

    # mu1
    mu1 = np.random.randn(d)
    mu1 = mu1 / np.linalg.norm(mu1) * rho # Ensure norm of mu1 is rho


    # mu2
    mu2 = np.random.randn(d)
    mu2 -= mu2 @ mu1 / (np.linalg.norm(mu1)**2) * mu1 # Ensure mu2 is orthogonal to mu1 by removing projection on mu1
    mu2 = mu2 / np.linalg.norm(mu2) * rho # Ensure norm of mu2 is rho


    # Generate dataset
    X = np.zeros((n_samples, 2, d))
    y = np.random.choice([+1, -1], size=n_samples) # Uniform label distribution

    # Clean indices
    clean = []

    # Noisy indices
    noisy = []

    for i in range(n_samples):
        # Generate signal component u based on label
        u = mu1 if y[i] == +1 else mu2

        # Generate noise component ksi
        covariance_matrix = np.eye(d) - (np.outer(mu1, mu1) + np.outer(mu2, mu2)) / (rho**2)
        noise = np.random.multivariate_normal(mean=np.zeros(d), cov=covariance_matrix)

        # Assign u to x(1) and ksi to x(2)
        X[i, 0, :] = u
        X[i, 1, :] = noise

        # Apply label flipping with probability eta
        if np.random.rand() < eta:
            y[i] = -y[i]
            # add index to the noisy list
            noisy.append(i)

        else :
            # add index to the clean list
            clean.append(i)

        clean_1 = [i for i in clean if X[i, 0, :] == mu1]
        clean_2 = [i for i in clean if X[i, 0, :] == mu2]

        noisy_1 = [i for i in clean if X[i, 0, :] == mu1]
        noisy_2 = [i for i in clean if X[i, 0, :] == mu2]

    return X, y, clean_1, clean_2, noisy_1, noisy_2