In [63]:
import numpy as np
import random
import pandas as pd
import scipy.io
import matplotlib.pyplot as plt
from qiskit.quantum_info import DensityMatrix, random_density_matrix
from qiskit.quantum_info.operators import Operator
import tensorflow as tf

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from scipy.linalg import sqrtm
from scipy.optimize import minimize

import torch
#tensorflow imports
from tensorflow import keras
from keras import layers, losses, Model
import logging
tf.get_logger().setLevel(logging.ERROR)

In [51]:
data = np.load('../datasets/1k_shots_noisy/2q_noise_0.05_1k_.npz')

In [52]:
X = data['counts']
y = data['states']

In [53]:
print(X.shape)
print(y.shape)

(100000, 36)
(100000, 4, 4)


# Cholesky MLE

In [14]:
# First limit the size of X so this actually runs
X_mle = X[0:1000 ,:]
y_mle = y[0:1000, :]
print(X_mle.shape)

(1000, 36)


In [18]:
shots = data['shots_per_basis']
counts = (X_mle * shots).astype(int)        # freq counts


# Build computational basis projectors for 2 qubits
proj = []
for m in range(4):
    P = np.zeros((4,4), dtype=complex)
    P[m, m] = 1
    proj.append(P)

# Define basis-change unitaries for X, Y, Z on one qubit
H = np.array([[1, 1], [1, -1]]) / np.sqrt(2)
Sdg = np.array([[1, 0], [0, -1j]])
bases = {
    'X': H,
    'Y': Sdg @ H,
    'Z': np.eye(2)
}

settings = []
for b1 in ['X','Y','Z']:
    for b2 in ['X','Y','Z']:
        U = np.kron(bases[b1], bases[b2])
        settings.append(U)

E = []
for U in settings:
    U_dag = U.conj().T
    for P in proj:
        E.append(U_dag @ P @ U)

# Map the cholesky parameterised vector to a PSD trace 1 density matrix
def params_to_rho(params):
    # params: length 16
    L = np.zeros((4,4), dtype=complex)
    idx = 0
    # diagonal entries (real, positive)
    for i in range(4):
        L[i, i] = params[idx]
        idx += 1
    # lower-triangular off-diagonals (real + imag)
    for i in range(1, 4):
        for j in range(i):
            re = params[idx]; im = params[idx+1]
            L[i, j] = re + 1j * im
            idx += 2
    rho = L @ L.conj().T
    return rho / np.trace(rho)

# COmpute NLL
def neg_log_likelihood(params, count):
    rho = params_to_rho(params)
    # avoid log(0) by clipping
    probs = np.array([np.real(np.trace(Ej @ rho)) for Ej in E])
    probs = np.clip(probs, 1e-12, 1.0)
    return -np.sum(count * np.log(probs))


N = X_mle.shape[0]
#initialise the estimator
rho_est = np.zeros((N, 4, 4), dtype=complex)

for i in range(N):
    
    # initial guess: uniform identity
    init = np.zeros(16)
    init[:4] = np.sqrt(1/4)
    
    res = minimize(
        neg_log_likelihood, init, args=(counts[i],),
        method='L-BFGS-B',
        options={'maxiter': 500}
    )
    #reconstruct the density matrix from cholesky decomp
    rho_est[i] = params_to_rho(res.x)


In [19]:
# Recover target density matrices from y
rho_target = np.zeros((N, 4, 4), dtype=complex)


# compute Fidelity
def fidelity(rho1, rho2):
    sqrt_rho1 = sqrtm(rho1)
    F = np.trace(sqrtm(sqrt_rho1 @ rho2 @ sqrt_rho1))
    return np.real(F)**2

fidelities = np.array([fidelity(rho_est[i], y_mle[i]) for i in range(N)])

mean_fidelity = np.mean(fidelities)
std_fidelity  = np.std(fidelities)
print(f"Mean fidelity: {mean_fidelity:.4f} ± {std_fidelity:.4f}")

Mean fidelity: 0.4725 ± 0.1307


# Cholesky NN

In [55]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=2)

In [58]:
def rho_to_alpha(rho):
    # T is lower triangular matrix
    L = np.linalg.cholesky(rho + 1e-14 * np.eye(y[0].shape[0]))
    alpha = []
    # extract the reals on the diagonals
    for i in range(rho.shape[0]):
        alpha.append(np.real(L[i, i]))  # add them to alpha
    
    # Off diagonals, contain real and imag components
    for i in range(1, rho.shape[0]):
        for j in range(i):
            alpha.append(np.real(L[i, j]))
            alpha.append(np.imag(L[i, j]))
    return np.array(alpha)

alphas = np.stack([rho_to_alpha(y[i]) for i in range(len(y))], axis=0)

N_x = X_train.shape[1]
N_alpha = alphas.shape[1]

model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(shape=(N_x,)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(128, activation="relu"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(128, activation="relu"),
    tf.keras.layers.Dense(N_alpha)   
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-3),
    loss="mse",
    metrics=["mse"]
)

history = model.fit(
    X_train, alphas,
    validation_split=0.2,
    epochs=1000,
    batch_size=1000,
)


Epoch 1/1000
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 0.4939 - mse: 0.4939 - val_loss: 0.1525 - val_mse: 0.1525
Epoch 2/1000
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.1291 - mse: 0.1291 - val_loss: 0.0741 - val_mse: 0.0741
Epoch 3/1000
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0925 - mse: 0.0925 - val_loss: 0.0596 - val_mse: 0.0596
Epoch 4/1000
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0777 - mse: 0.0777 - val_loss: 0.0538 - val_mse: 0.0538
Epoch 5/1000
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.0694 - mse: 0.0694 - val_loss: 0.0509 - val_mse: 0.0509
Epoch 6/1000
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0639 - mse: 0.0639 - val_loss: 0.0493 - val_mse: 0.0493
Epoch 7/1000
[1m56/56[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - 

In [59]:
def alpha_to_rho(alpha):
    """Convert batch of alpha vectors to density matrices using Cholesky."""
    N = alpha.shape[0]
    rho = np.zeros((N, 4, 4), dtype=np.complex64)
    for i in range(N):
        a = alpha[i]
        L = np.zeros((4, 4), dtype=np.complex64)
        idx = 0
        for j in range(4):
            L[j, j] = a[idx]
            idx += 1
        for j in range(1, 4):
            for k in range(j):
                re = a[idx]
                im = a[idx + 1]
                L[j, k] = re + 1j * im
                idx += 2
        rho_i = L @ L.conj().T
        rho[i] = rho_i / np.trace(rho_i)
    return rho

def reconstruct_true_rho(y):
    """Convert vectorized real+imag back to complex 4x4 matrices."""
    N = y.shape[0]
    real = y[:, :16].reshape(N, 4, 4)
    imag = y[:, 16:].reshape(N, 4, 4)
    return real + 1j * imag

def fidelity(rho1, rho2):
    """
    Uhlmann fidelity between two density matrices.
    inputs: the two density matrices to be compares
    """
    sqrt_rho1 = sqrtm(rho1)
    product = sqrt_rho1 @ rho2 @ sqrt_rho1
    sqrt_product = sqrtm(product)
    F = np.trace(sqrt_product)
    return np.real(F)**2


alpha_pred = model.predict(X_test)      
rho_pred = alpha_to_rho(alpha_pred)  
rho_true = y_test

# Compute fidelities
fidelities = np.array([
    fidelity(rho_pred[i], rho_true[i])
    for i in range(len(rho_pred))
])

# Summary
mean_fid = np.mean(fidelities)
std_fid = np.std(fidelities)
print(f"Test Set Fidelity: {mean_fid:.4f} ± {std_fid:.4f}")

[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 297us/step
Test Set Fidelity: 0.2935 ± 0.1633


# 

In [None]:
def rho_to_alpha(rho):
    """
    Convert a (d x d) density matrix rho into its "Cholesky parameters" alpha.
    """
    L = np.linalg.cholesky(rho)
    d = rho.shape[0]
    alpha = []
    # diagonal (real, >0)
    for i in range(d):
        alpha.append(np.real(L[i, i]))
    # strictly lower triangle (real + imag)
    for i in range(1, d):
        for j in range(i):
            alpha.append(np.real(L[i, j]))
            alpha.append(np.imag(L[i, j]))
    return np.array(alpha, dtype=np.float32)

def fidelity(rho1, rho2):
    A = sqrtm(rho1)
    return np.real(np.trace(sqrtm(A @ rho2 @ A)))**2


def tf_sqrtm_psd(A):
    """
    Compute principal sqrt of Hermitian PSD A (batch,d,d) via eigendecomposition.
    """
    
    eigvals, eigvecs = tf.linalg.eigh(A)

    eigvals = tf.math.real(eigvals)
    eigvals = tf.clip_by_value(eigvals, 0.0, tf.reduce_max(eigvals))
    sqrtvals = tf.sqrt(eigvals)
    D = tf.cast(tf.linalg.diag(sqrtvals), tf.complex64)
    
    return eigvecs @ D @ tf.linalg.adjoint(eigvecs)

def tf_alpha_to_rho(alpha, d):
    """
    Map real alpha (batch, N_alpha) -> complex density matrices (batch, d, d).
    Enforces positivity via a softplus on the Cholesky diag.
    """
    batch = tf.shape(alpha)[0]
    # split diag vs off-diag
    raw_diag = alpha[:, :d] 
    off_vals  = alpha[:, d:] 

    # start zero L
    L = tf.zeros((batch, d, d), tf.complex64)

    diag_pos = tf.nn.softplus(raw_diag) + 1e-6 
    diag_c   = tf.cast(diag_pos, tf.complex64)
    L = tf.linalg.set_diag(L, diag_c)

    idx = 0
    for i in range(1, d):
        for j in range(i):
            re = off_vals[:, idx]
            im = off_vals[:, idx+1]
            idx += 2
            cij = (tf.cast(re, tf.complex64)
                   + 1j * tf.cast(im, tf.complex64))
            cij = tf.reshape(cij, (batch, 1, 1))

            # mask with a one-hot at (i,j)
            flat = tf.one_hot(i*d + j, d*d, dtype=tf.complex64)
            mask = tf.reshape(flat, (d, d))[None, :, :]

            L = L + cij * mask

    rho = L @ tf.linalg.adjoint(L)  
    tr  = tf.linalg.trace(rho)
    return rho / tr[:, None, None]

def make_fidelity_loss(d):
    def fidelity_loss(alpha_true, alpha_pred):
        rho_t = tf_alpha_to_rho(alpha_true, d)
        rho_p = tf_alpha_to_rho(alpha_pred, d)

        # tiny regularizer to guard numeric issues
        I = tf.eye(d, dtype=tf.complex64)[None, :, :]
        rho_t = rho_t + 1e-8 * I
        rho_p = rho_p + 1e-8 * I

        sqrt_t = tf_sqrtm_psd(rho_t)
        inter  = sqrt_t @ (rho_p @ sqrt_t)
        s_mat  = tf_sqrtm_psd(inter)

        tr_s = tf.linalg.trace(s_mat)
        F = tf.abs(tr_s)**2
        return tf.reduce_mean(1.0 - F)
    return fidelity_loss

def make_hybrid_loss(d, lam=0.8):
    """
    Hybrid loss = lam * MSE + (1 - lam) * (1 - fidelity).
    """
    fid_loss_fn = make_fidelity_loss(d)

    def hybrid_loss(alpha_true, alpha_pred):
        # MSE on the Cholesky parameters
        mse = tf.reduce_mean(tf.square(alpha_true - alpha_pred))

        # fidelity loss already = mean(1 - F)
        phys = fid_loss_fn(alpha_true, alpha_pred)

        return lam * mse + (1.0 - lam) * phys

    return hybrid_loss

In [64]:
d = y_train.shape[1]

alphas_train = np.stack([rho_to_alpha(rho) for rho in y_train], axis=0)
N_alpha = alphas_train.shape[1]

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train.shape[1],)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(128, activation="relu"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(128, activation="relu"),
    tf.keras.layers.Dense(N_alpha),
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-3),
    loss=make_hybrid_loss(d),
)

# fit
history = model.fit(
    X_train, alphas_train,
    validation_split=0.3,
    epochs=50,
    batch_size=64,
)

Epoch 1/50
[1m766/766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.1682 - val_loss: 0.0307
Epoch 2/50
[1m766/766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0373 - val_loss: 0.0281
Epoch 3/50
[1m766/766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0322 - val_loss: 0.0268
Epoch 4/50
[1m766/766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0307 - val_loss: 0.0261
Epoch 5/50
[1m766/766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0301 - val_loss: 0.0259
Epoch 6/50
[1m766/766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0294 - val_loss: 0.0254
Epoch 7/50
[1m766/766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.0289 - val_loss: 0.0252
Epoch 8/50
[1m766/766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.0289 - val_loss: 0.0253
Epoch 9/50
[1m766/766[0m [32m━━━━━━━━

In [65]:
def alpha_to_rho_batch(alpha):
    """Convert batch of alpha vectors to density matrices using Cholesky."""
    N = alpha.shape[0]
    rho = np.zeros((N, 4, 4), dtype=np.complex64)
    for i in range(N):
        a = alpha[i]
        L = np.zeros((4, 4), dtype=np.complex64)
        idx = 0
        for j in range(4):
            L[j, j] = a[idx]
            idx += 1
        for j in range(1, 4):
            for k in range(j):
                re = a[idx]
                im = a[idx + 1]
                L[j, k] = re + 1j * im
                idx += 2
        rho_i = L @ L.conj().T
        rho[i] = rho_i / np.trace(rho_i)
    return rho

def fidelity(rho1, rho2):
    """
    Uhlmann fidelity between two density matrices.
    inputs: the two density matrices to be compares
    """
    sqrt_rho1 = sqrtm(rho1)
    product = sqrt_rho1 @ rho2 @ sqrt_rho1
    sqrt_product = sqrtm(product)
    F = np.trace(sqrt_product)
    return np.real(F)**2


alpha_pred = model.predict(X_test)      
rho_pred = alpha_to_rho_batch(alpha_pred)  

# Compute fidelities
fidelities = np.array([
    fidelity(rho_pred[i], y_test[i])
    for i in range(len(rho_pred))
])

# Summary
mean_fid = np.mean(fidelities)
std_fid = np.std(fidelities)
print(f"Test Set Fidelity: {mean_fid:.4f} ± {std_fid:.4f}")

[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 308us/step
Test Set Fidelity: 0.8455 ± 0.1187


In [66]:
model.save_weights("weights.weights.h5")

# High Shot No Noise Test

In [67]:
data = np.load('../datasets/2q_nonoise_10k_.npz')
X = data['counts']
y = data['states']

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=2)

d = y_train.shape[1]

alphas_train = np.stack([rho_to_alpha(rho) for rho in y_train], axis=0)
N_alpha = alphas_train.shape[1]

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train.shape[1],)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(128, activation="relu"),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(128, activation="relu"),
    tf.keras.layers.Dense(N_alpha),
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-3),
    loss=make_hybrid_loss(d),
)


history = model.fit(
    X_train, alphas_train,
    validation_split=0.3,
    epochs=50,
    batch_size=64,
)

Epoch 1/50
[1m766/766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 0.1711 - val_loss: 0.0142
Epoch 2/50
[1m766/766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0238 - val_loss: 0.0095
Epoch 3/50
[1m766/766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.0170 - val_loss: 0.0075
Epoch 4/50
[1m766/766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.0145 - val_loss: 0.0066
Epoch 5/50
[1m766/766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.0135 - val_loss: 0.0058
Epoch 6/50
[1m766/766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0128 - val_loss: 0.0057
Epoch 7/50
[1m766/766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.0123 - val_loss: 0.0054
Epoch 8/50
[1m766/766[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.0119 - val_loss: 0.0051
Epoch 9/50
[1m766/766[0m [32m━━━━━━━━

In [68]:
alpha_pred = model.predict(X_test)      
rho_pred = alpha_to_rho_batch(alpha_pred)  

# Compute fidelities
fidelities = np.array([
    fidelity(rho_pred[i], y_test[i])
    for i in range(len(rho_pred))
])

# Summary
mean_fid = np.mean(fidelities)
std_fid = np.std(fidelities)
print(f"Test Set Fidelity: {mean_fid:.4f} ± {std_fid:.4f}")

[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 369us/step
Test Set Fidelity: 0.9799 ± 0.0178


python pure_mixed_build.py --n_qubits 2 --n_states 100000 --shots_per_basis 5000 --pure_fraction 0.5 --xi 0.0 --resample_per_state True --output 2q_5k_.npz