# Datasets Generations

In this notebook, we will generate the datasets for the experiments. We will generate the following datasets:

- `ds_haar_op.csv` - Dataset of operators uniformly sampled from the Haar measure on the unitary group. With labels indicating if the operator is separable or entangled.
- `ds_haar_obs,csv` - Dataset obtained from the previous one by applying projections on a bell state to each operator.
- `ds_mixed_ds.csv` - Dataset of mixed states density matrices.
- `ds_mixed_obs.csv` - Dataset obtained from the previous one by applying projections on a bell state to each density matrix.

In [None]:
# imports
import csv
import random
from qiskit import QuantumCircuit
import numpy as np
from qiskit.quantum_info import random_unitary, Operator, DensityMatrix, schmidt_decomposition

## Haar Distributed Datasets

The dataset `ds_haar_op.csv` consists of the entries of unitary operators used to generate states that are uniformely distributed with the _Haar measure_.
Separable states are generated by applying to a 2-qubit circuit the tensor product of two 2x2 unitary matrices. Entangled states are generated by applying a 4x4 unitary matrix to a 2-qubit circuit.
Theoretically, a random 4x4 unitary matrix can generate a separable state, but the probability is very low. To be sure the state obtained by applying the unitary operator $U$ is checked for entanglement using the _Schmidt decomposition_.

The dataset `ds_haar_obs.csv` is obtained from the previous one by applying projections on a bell state to each operator. The probability of successfully projecting on a bell state is calculated for each pair of local projections.
This is the method used by Trávníček et al. in their paper "Sensitivity versus selectivity in entanglement detection via collective witnesses" (2023).
It should be noted that the method used in the paper is applied to mixed states, while we use it on pure states.

In [None]:
# generation functions

def gen_2qubit_datapoint():
    """Generate a datapoint for a 2-qubit system.

    To generate the datapoint one of two methods is chosen randomly:
    1. A random 4x4 unitary matrix is generated and applied to a 2-qubit quantum circuit.
    2. Two random 2x2 unitary matrices are generated and their tensor product is applied to a 2-qubit quantum circuit.
    The density matrix of the resulting state is then calculated and the Schmidt rank is used to determine
    if the state is separable or entangled. (If the Schmidt rank is 1, the state is separable.)

    Returns:
        *x, sep (tuple): A tuple containing the coefficients of the unitary matrices and a boolean indicating if the
                         state is separable.
    """
    # randomly choose between two generation methods
    if np.random.rand() >= 0.5:
        # generate a random 4x4 unitary matrix
        unitary_op = random_unitary(4)
    else:
        # generate a separable state by applying two random 2x2 unitary matrices
        Ua = random_unitary(2)
        Ub = random_unitary(2)
        unitary_op = Operator(np.kron(Ua, Ub))
    qc = QuantumCircuit(2)
    qc.unitary(unitary_op, [0, 1])

    # get coefficients of the unitary matrix
    x = unitary_op.data.flatten().tolist()

    # determine entanglement
    rho = DensityMatrix.from_instruction(qc)
    sep = len(schmidt_decomposition(rho, [0])) == 1
    return *x, sep

In [None]:
# pauli matrices
identity = np.eye(2)
pauliX = np.array([[0, 1], [1, 0]])
pauliY = np.array([[0, -1j], [1j, 0]])
pauliZ = np.array([[1, 0], [0, -1]])


# projectors
projector_1 = 1/4 * (identity + 1/np.sqrt(3) * (pauliX + pauliY + pauliZ))
projector_2 = 1/4 * (identity + 1/np.sqrt(3) * (pauliX - pauliY - pauliZ))
projector_3 = 1/4 * (identity + 1/np.sqrt(3) * (-pauliX + pauliY - pauliZ))
projector_4 = 1/4 * (identity + 1/np.sqrt(3) * (-pauliX - pauliY + pauliZ))


# bell states
def psi_plus():
    """Return the density matrix of the psi plus bell state."""

    ket = (np.kron(np.array([1, 0]), np.array([0, 1])) + np.kron(np.array([0, 1]), np.array([1, 0]))) / np.sqrt(2)
    return np.outer(ket, ket)


def psi_minus():
    """Return the density matrix of the psi minus bell state."""

    ket = (np.kron(np.array([1, 0]), np.array([0, 1])) - np.kron(np.array([0, 1]), np.array([1, 0]))) / np.sqrt(2)
    return np.outer(ket, ket)


# projection
def Pxy(density_matrix, observablex, observabley):
    """The probability of succesfully projecting on a bell state the collective measures a pair of local projections."""

    # Project on the psi minus bell state
    bell_state = psi_minus()
    projection_on_bs = np.kron(np.kron(observablex, bell_state), observabley)

    # Project on the identity to normalize the probability
    projection_on_id = np.kron(np.kron(observablex, np.eye(4)), observabley)

    # Calculate the probability
    rho_T = np.kron(density_matrix, density_matrix)
    up = np.trace(np.dot(rho_T, projection_on_bs))
    down = np.trace(np.dot(rho_T, projection_on_id))

    # the probability is the real part of the trace (complex part should be 0 but numerical errors can occur)
    prob = up / down
    return prob.real

In [None]:
# transformation functions

def get_density_matrix_from_datapoint(datapoint):
    """Return the density matrix (as array) of a datapoint."""

    operator = get_operator_from_datapoint(datapoint)
    qc = QuantumCircuit(2)
    qc.append(operator, [0, 1])
    return qi.DensityMatrix.from_instruction(qc).data


def get_new_features(datapoint):
    """Obtain a datapoint with the expectation values of the projectors and the label from a (old) datapoint."""
    new_x = []
    density_matrix = get_density_matrix_from_datapoint(datapoint)
    proj_combinations = [(projector_1, projector_1),
                         (projector_2, projector_2),
                         (projector_3, projector_3),
                         (projector_4, projector_4),
                         (projector_1, projector_3),
                         (projector_1, projector_4),
                         (projector_2, projector_4),
                         (projector_1, projector_2),
                         (projector_2, projector_3),
                         (projector_3, projector_4),
                         ]
    # measure the combinations of projectors
    for p1, p2 in proj_combinations:
        new_x.append(Pxy(density_matrix, p1, p2))

    # also return the label
    return *new_x, datapoint[-1]

## Mixed State Datasets

The dataset `ds_mixed_ds.csv` consists of the entries of density matrices of mixed states.
The mixed states are generate like the previosly cited paper by Trávníček et al. (2023), the method consists of:
- First, a 4 × 4 diagonal matrix $\rho$ is randomly generated, where the diagonal elements are uniformly distributed random numbers from the range of [0, 1] and satisfy the condition $Tr(\rho) = 1$.
- The next step involves creating a random unitary matrix $U$, which transforms the diagonal matrix $\rho$ into a general random density matrix $\hat\rho$ using the following relation
$$
\hat\rho = U^\dagger \rho U.
$$
- Additionaly we check for entanglement using the _negativity criterion_.

This generates states in an unbulanced way, with a higher probability of generating entangled states. As such undersampling is performed to balance the dataset.

In [3]:
def is_separable(density_matrix, tol=1e-10):
    """check if a density matrix is separable or entangled using the negativity criterion"""
    # Calculate the negativity of the density matrix
    neg = negativity(DensityMatrix(density_matrix), [2])
    # If the negativity is non-zero, the density matrix is entangled
    return abs(neg) < tol


def generate_unbalanced_dataset(size: int = 10000) -> list:
    """Generate a dataset of density matrices of mixed states.
    
    The dataset is generated by creating a random 4x4 diagonal matrix rho that satisfies the condition Tr(rho) = 1.
    A random unitary matrix U is then generated and used to transform rho into a general random density matrix rho_hat by the relation rho_hat = U^dagger @ rho @ U.
    The density matrix is then checked for entanglement using the negativity criterion.
    
    New datapoints are generated until there are size/2 separable states.
    
    Args:
        size: desired size of the dataset

    Returns:
        list: a list of unpacked density matrices and a boolean indicating if the state is separable
    """
    dataset = []
    while sum([not d[-1] for d in dataset]) < size / 2:
        # Generate a random 4x4 diagonal matrix
        rho = np.diag(np.random.rand(4))
        # Normalize the trace
        rho = rho / np.trace(rho)
    
        # Generate a random unitary matrix
        U = np.random.rand(4, 4) + 1j * np.random.rand(4, 4)
        U, _ = np.linalg.qr(U)
    
        # check if U is unitary
        assert np.allclose(np.eye(4), U @ U.T.conj()), "U is not unitary"
    
        # Create the random density matrix
        rho_hat = U @ rho @ U.T.conj()
        
        # determine if rho_hat is separable or entangled
        sep = is_separable(rho_hat)
    
        # add the datapoint to the dataset
        datapoint = *rho_hat.flatten(), sep
        dataset.append(datapoint)
    print(f"Separable = {sum([d[-1] for d in dataset])}, Entangled = {len(dataset) - sum([d[-1] for d in dataset])}")
    return dataset

def balance_dataset(dataset: list, size: int = 10000) -> list:
    """Balance the dataset by undersampling the entangled states.
    
    DEPRECATED: The function is a less efficient implementation of `imblearn.under_sampling.RandomUnderSampler`.
    
    Args:
        dataset: unbanced dataset.
        size: final size of the dataset

    Returns:
        list: balanced dataset
    """
    separable = [d for d in dataset if d[-1]]
    separable = separable[:size // 2]
    
    entangled = [d for d in dataset if not d[-1]]
    entangled = entangled[:size // 2]
    
    # join and shuffle
    dataset = separable + entangled
    random.shuffle(dataset)
    return dataset

In [None]:
def get_obs_features_from_density_matrix(datapoint):
    """Obtain a datapoint with the expectation values of the projectors and the label from a density matrix datapoint."""
    # adapts datapoint
    density_matrix = DensityMatrix(np.array(datapoint[:-1], dtype=np.csingle).reshape(4, 4))
    
    new_x = []
    proj_combinations = [(projector_1, projector_1),
                         (projector_2, projector_2),
                         (projector_3, projector_3),
                         (projector_4, projector_4),
                         (projector_1, projector_3),
                         (projector_1, projector_4),
                         (projector_2, projector_4),
                         (projector_1, projector_2),
                         (projector_2, projector_3),
                         (projector_3, projector_4),
                         ]
    # measure the combinations of projectors
    for p1, p2 in proj_combinations:
        new_x.append(Pxy(density_matrix, p1, p2))

    # also return the label
    return *new_x, datapoint[-1]

## Generation

In [None]:
dataset1 = [gen_2qubit_datapoint() for _ in range(10_000)]
dataset2 = [get_new_features(datapoint) for datapoint in dataset1]
dataset3 = balance_dataset(generate_unbalanced_dataset(10_000), size=10_000)
dataset4 = [get_obs_features_from_density_matrix(datapoint) for datapoint in dataset3]

print(f"Haar Dataset size: {len(dataset1)}")
print(f"Entangled states: {sum([1 for datapoint in dataset1 if datapoint[-1]])}")
print(f"Non-entangled states: {sum([1 for datapoint in dataset1 if not datapoint[-1]])}")
print(f"len datapoint = {len(dataset1[0])}")

In [None]:
# select name and dataset to save
filepath = "dataset.csv"
dataset = dataset1

# write csv
with open(filepath, "w") as f:
    writer = csv.writer(f)
    writer.writerow(["U11", "U12", "U13", "U14",
                     "U21", "U22", "U23", "U24",
                     "U31", "U32", "U33", "U34",
                     "U41", "U42", "U43", "U44",
                     "sep"])
    writer.writerows(dataset)