In [7]:
import jax
import jax.numpy as jnp
import os

In [8]:
key = jax.random.PRNGKey(0)

In [9]:
def generate_data(n_samples,n_features):
    """
    Generate synthetic regression data using random Fourier features
    
    Parameters:
    -----------
    n_samples : int
        Number of samples to generate
    d : int 
        Dimension of input features
    m : int
        Number of random features
    sigma : float
        Standard deviation for weight initialization
    noise : float
        Standard deviation of Gaussian noise
        
    Returns:
    --------
    X : array of shape [n_samples, d]
        Input features
    y : array of shape [n_samples]
        Target values
    """
    X = jax.random.uniform(key, shape=(n_samples, n_features),minval=-1,maxval=1)
    # Calculate labels using tanh function
    # Y = jnp.sin(jnp.sum(5*X, axis=1))/jnp.sum(5*X, axis=1)
    Y = 1-2*jnp.mean(X**2, axis=1)
    
    return jnp.array(X), jnp.array(Y)
    
    



In [10]:
def generate_dataset(features,labels,M_total,M_train,M_test,key=jax.random.PRNGKey(0)):
    
    # subsample train and test split
    test_indices = jax.random.choice(key,M_total, shape=(M_test,), replace=False)
    
    

    train_indices = jax.random.choice(key,
        jnp.setdiff1d(jnp.arange(M_total), test_indices), shape=(M_train,), replace=False)
        
    
    x_train, y_train = features[train_indices], labels[train_indices]
    x_test, y_test = features[test_indices], labels[test_indices]
    

    return jnp.array(x_train),jnp.array(y_train),jnp.array(x_test),jnp.array(y_test)

In [11]:
def save_dataset(n_qubits,M_train,M_test,datasets_path,data_type="regression",M_total = 20000):
    
    os.makedirs(f"{datasets_path}/{data_type}", exist_ok=True)
    n_features =  n_qubits -2  # Dimensionality
    # Generate synthetic data
    n_samples = M_total
    features,labels = generate_data(n_samples,n_features)
    print(features.shape)
    x_train,y_train,x_test,y_test = generate_dataset(features,labels,M_total,M_train,M_test)
    jnp.save(f"{datasets_path}/{data_type}/x_train_qubit_{n_qubits}_sample_{M_train}.npy",x_train)
    jnp.save(f"{datasets_path}/{data_type}/y_train_qubit_{n_qubits}_sample_{M_train}.npy",y_train)
    jnp.save(f"{datasets_path}/{data_type}/x_test_qubit_{n_qubits}_sample_{M_test}.npy",x_test)
    jnp.save(f"{datasets_path}/{data_type}/y_test_qubit_{n_qubits}_sample_{M_test}.npy",y_test)
    

In [10]:
save_dataset(N=6,M_train=50,M_test=10000)

(20000, 6)


In [12]:
datasets_path = '../../datasets'
data_type = "regression_special"

In [14]:
for M in [10,250,500,1000,1500,2000]:
    for n in [3,6,9,12]:
        save_dataset(n_qubits=n,M_train=M,M_test=10000,datasets_path=datasets_path,data_type=data_type)

(20000, 1)
(20000, 4)
(20000, 7)
(20000, 10)
(20000, 1)
(20000, 4)
(20000, 7)
(20000, 10)
(20000, 1)
(20000, 4)
(20000, 7)
(20000, 10)
(20000, 1)
(20000, 4)
(20000, 7)
(20000, 10)
(20000, 1)
(20000, 4)
(20000, 7)
(20000, 10)
(20000, 1)
(20000, 4)
(20000, 7)
(20000, 10)


In [10]:
for n_samples in [10,100,250,500,1000,1500,2000,5000]:
    save_dataset(N=6,M_train=n_samples,M_test=10000)

(20000, 6)
(20000, 6)
(20000, 6)
(20000, 6)
(20000, 6)
(20000, 6)
(20000, 6)
(20000, 6)


In [96]:
a = jnp.load("x_train_qubit_6_sample_2000_regression.npy")

In [97]:
a.shape

(2000, 6)