In [1]:
import jax
import jax.numpy as jnp
import numpy as np

import os


In [2]:
jax.config.update('jax_platform_name', 'cpu')
jax.config.update("jax_enable_x64", True)
jax.default_backend()

An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu.


'cpu'

In [3]:
key = jax.random.PRNGKey(0)

In [4]:
def generate_linearly_separable(n_samples, n_features):
    """Data generation procedure for 'linearly separable'.

    Args:
        n_samples (int): number of samples to generate
        n_features (int): dimension of the data samples
        margin (float): width between hyperplane and closest samples
    """

    w_true = jnp.ones(n_features)
    margin = 0.3*n_features
    # hack: sample more data than we need randomly from a hypercube
    X = jax.random.uniform(key, shape=(40 * n_samples, n_features),minval=-jnp.pi/2,maxval=jnp.pi/2)

    # only retain data outside a margin
    X = [x for x in X if jnp.abs(jnp.dot(x, w_true)) > margin]
    X = X[:n_samples]

    y = [jnp.dot(x, w_true) for x in X]
    y = [0 if y_ > 0 else 1 for y_ in y]
    return jnp.array(X), jnp.array(y)

In [5]:
def generate_dataset(features,labels,M_total,M_train,M_test,key=jax.random.PRNGKey(0)):
    
    # subsample train and test split
    test_indices = jax.random.choice(key,M_total, shape=(M_test,), replace=False)
    
    train_indices = jax.random.choice(key,
        jnp.setdiff1d(jnp.arange(M_total), test_indices), shape=(M_train,), replace=False)
        
    x_train, y_train = features[train_indices], labels[train_indices]
    x_test, y_test = features[test_indices], labels[test_indices]

    return jnp.array(x_train),jnp.array(y_train),jnp.array(x_test),jnp.array(y_test)

In [6]:
def save_dataset(n_qubits,n_layers,M_train,M_test,data_type,M_total,datasets_path):

    # Create directory if it doesn't exist
    os.makedirs(f"{datasets_path}/{data_type}", exist_ok=True)
    n_features =  n_qubits * n_layers * 3  # Dimensionality
    n_samples = M_total
    features,labels = generate_linearly_separable(n_samples,n_features)
    print(features.shape)
    features = features.reshape(n_samples,n_layers,n_qubits,3)
    x_train,y_train,x_test,y_test = generate_dataset(features,labels,M_total,M_train,M_test)
    jnp.save(f"{datasets_path}/{data_type}/x_train_qubit_{n_qubits}_layer_{n_layers}_sample_{M_train}.npy",x_train)
    jnp.save(f"{datasets_path}/{data_type}/y_train_qubit_{n_qubits}_layer_{n_layers}_sample_{M_train}.npy",y_train)
    jnp.save(f"{datasets_path}/{data_type}/x_test_qubit_{n_qubits}_layer_{n_layers}_sample_{M_test}.npy",x_test)
    jnp.save(f"{datasets_path}/{data_type}/y_test_qubit_{n_qubits}_layer_{n_layers}_sample_{M_test}.npy",y_test)
    

In [8]:
M_train = 600
M_test = 10000
M_total = 16000
n_qubits = 1
data_type = "classification_linear"
datasets_path = '../../datasets'

In [29]:
for l in range(1,9):
    save_dataset(n_qubits=n_qubits,n_layers=l,M_train=M_train,M_test=M_test,data_type=data_type,M_total=M_total,datasets_path=datasets_path)

(16000, 3)
(16000, 6)
(16000, 9)
(16000, 12)
(16000, 15)
(16000, 18)
(16000, 21)
(16000, 24)


In [30]:
n_qubits = 1 
n_layers = 8
for M_train in [1200,2000,5000]:
    save_dataset(n_qubits=n_qubits,n_layers=n_layers,M_train=M_train,M_test=M_test,data_type=data_type,M_total=M_total,datasets_path=datasets_path)

(16000, 24)
(16000, 24)
(16000, 24)


In [9]:
M_train = 600
for (n,l) in [(2,4),(4,2),(8,1)]:
    save_dataset(n_qubits=n,n_layers=l,M_train=M_train,M_test=M_test,data_type=data_type,M_total=M_total,datasets_path=datasets_path)

(16000, 24)
(16000, 24)
(16000, 24)


In [11]:
M_train = 5000
for (n,l) in [(2,4),(4,2),(8,1)]:
    save_dataset(n_qubits=n,n_layers=l,M_train=M_train,M_test=M_test,data_type=data_type,M_total=M_total,datasets_path=datasets_path)

(16000, 24)
(16000, 24)
(16000, 24)
