In [7]:
import os
import jax
import jax.numpy as jnp



In [2]:
key = jax.random.PRNGKey(0)

In [3]:
def generate_tanh_dataset(n_samples, n_features):
    """Generate a regression dataset based on hyperbolic tangent function.
    
    This function creates a non-linear regression dataset where the labels are
    computed as the normalized hyperbolic tangent of the sum of input features.
    
    Args:
        n_samples (int): Number of samples to generate
        n_features (int): Number of features per sample
        
    Returns:
        tuple: Contains two JAX arrays:
            - X: Input features array of shape (n_samples, n_features)
            - Y: Label array of shape (n_samples,) with values in range [0,1]
    """
    
    # Generate random features in range [-pi, pi]
    X = jax.random.uniform(key, shape=(n_samples, n_features),minval=-1,maxval=1)
    # Calculate labels using tanh function
    Y = (jnp.tanh(jnp.sum(X, axis=1)) + 1) / 2
    
    return jnp.array(X), jnp.array(Y)

In [4]:
def generate_dataset(features,labels,M_total,M_train,M_test,key=jax.random.PRNGKey(0)):
    
    # subsample train and test split
    test_indices = jax.random.choice(key,M_total, shape=(M_test,), replace=False)
    
    train_indices = jax.random.choice(key,
        jnp.setdiff1d(jnp.arange(M_total), test_indices), shape=(M_train,), replace=False)
        
    x_train, y_train = features[train_indices], labels[train_indices]
    x_test, y_test = features[test_indices], labels[test_indices]

    return jnp.array(x_train),jnp.array(y_train),jnp.array(x_test),jnp.array(y_test)

In [5]:
def save_dataset(n_qubits,n_layers,M_train,M_test,data_type,M_total,datasets_path):
    # Create directory if it doesn't exist
    os.makedirs(f"{datasets_path}/{data_type}", exist_ok=True)
    n_features =  n_qubits * n_layers * 3  # Dimensionality
    n_samples = M_total
    features,labels = generate_tanh_dataset(n_samples,n_features)
    print(features.shape)
    features = features.reshape(n_samples,n_layers,n_qubits,3)
    x_train,y_train,x_test,y_test = generate_dataset(features,labels,M_total,M_train,M_test)
    jnp.save(f"{datasets_path}/{data_type}/x_train_qubit_{n_qubits}_layer_{n_layers}_sample_{M_train}.npy",x_train)
    jnp.save(f"{datasets_path}/{data_type}/y_train_qubit_{n_qubits}_layer_{n_layers}_sample_{M_train}.npy",y_train)
    jnp.save(f"{datasets_path}/{data_type}/x_test_qubit_{n_qubits}_layer_{n_layers}_sample_{M_test}.npy",x_test)
    jnp.save(f"{datasets_path}/{data_type}/y_test_qubit_{n_qubits}_layer_{n_layers}_sample_{M_test}.npy",y_test)
    

In [10]:
M_train = 600
M_test = 10000
M_total = 11000
n_qubits = 2
data_type = "regression"
datasets_path = '../../datasets'

In [11]:
for l in range(1,11):
    save_dataset(n_qubits,l,M_train=M_train,M_test=M_test,M_total=M_total,data_type=data_type,datasets_path=datasets_path)

(11000, 6)
(11000, 12)
(11000, 18)
(11000, 24)
(11000, 30)
(11000, 36)
(11000, 42)
(11000, 48)
(11000, 54)
(11000, 60)
