In [7]:
import numpy as np
import pandas as pd
import scipy.linalg

In [58]:
def dm_to_bloch_reg(rho):
    # rho is a density matrix
    state = dm_to_bloch_vector(rho)
    state = cartesian_to_spherical(state)

    # state is now (theta, phi, r)
    for i, intv in enumerate(thetas):
        if (state[0] in intv):
            theta_reg = i
    for i, intv in enumerate(phis):
        if (state[1] in intv):
            phi_reg = i
    for i, intv in enumerate(radii):
        if (state[2] in intv):
            r_reg = i
    return (theta_reg, phi_reg, r_reg)

def dm_to_bloch_vector(rho):
    x = np.array([[0,1],[1,0]])
    y = np.array([[0,-1j],[1j,0]])
    z = np.array([[1,0],[0,-1]])

    # where rho is a density matrix
    return (np.trace(x @ rho), np.trace(y @ rho), np.trace(z @ rho))

def dm_to_polar_coords(rho):
    # rho is a density matrix
    return cartesian_to_spherical(dm_to_bloch_vector(rho))

def cartesian_to_spherical(state):
    x = state[0]
    y = state[1]
    z = state[2]

    r = min(np.sqrt(x**2 + y**2 + z**2), 1)
    theta = max(0, np.arctan(y/x))
    phi = max(0, np.arccos(z/r))

    return (theta, phi, r)

def spherical_to_cartesian(state):
    theta = state[0]
    phi = state[1]
    r = state[2]

    x = r*np.cos(theta)*np.sin(phi)
    y = r*np.sin(theta)*np.sin(phi)
    z = r*np.cos(phi)

    return (x, y, z)

def random_state_in_reg(reg):
    # where reg is a tuple specifying (theta, phi, radius)
    # returns a density matrix
    theta = np.random.uniform(thetas[reg[0]].left, thetas[reg[0]].right)

    # maybe consider the poles as one state
    # if (reg == (0, 0) or reg == (len(thetas)-1, len(phis)-1)):
        # phi = np.random.uniform(-np.pi, np.pi)
    # else:
    phi = np.random.uniform(phis[reg[1]].left, phis[reg[1]].right)
    r = np.random.uniform(radii[reg[2]].left, radii[reg[2]].right)
    
    state = spherical_to_cartesian((theta, phi, r))
    rho = (np.eye(2) + state[0]*np.array([[0,1],[1,0]]) + state[1]*np.array([[0, -1j], [1j, 0]]) + state[2]*np.array([[1,0], [0,-1]]))/2
    return np.matrix(rho)

def generate_target_state(n):
    s = np.matrix([1, 0])
    rho = np.outer(s, s.H)
    
    for i in range(n):
        ht = GATES[0] @ GATES[1]
        rho = apply_operator(rho, ht)
    return rho

def apply_operator(rho, op): # add noise
    return op @ rho @ op.H

def dm_fidelity(rho, sigma):
    rho_sqrt = scipy.linalg.sqrtm(rho)
    return np.trace(scipy.linalg.sqrtm(rho_sqrt @ sigma @ rho_sqrt))**2

In [59]:
n = 10**5
k = 16

GATES = [
    np.matrix([[1, 1], [1, -1]]) * 1/np.sqrt(2), # H
    np.matrix([[1, 0], [0, np.exp(1j * np.pi / 4)]]), # T
    np.matrix([[1, 0], [0, 1]]) # I
]

goal = generate_target_state(n=n)
thetas = pd.cut(np.linspace(0, np.pi, k), k, precision=10, include_lowest=True)
phis = pd.cut(np.linspace(0, 2*np.pi, k), k,  precision=10, include_lowest=True)
# rs = (1 - np.geomspace(1e-3, 1, k))[::-1]
# rs[0] = 0
# rs[-1] = 1
radii = pd.cut(np.linspace(0, 1, k), k, precision=10, include_lowest=True)
goal_reg = dm_to_bloch_reg(goal)

states = [(i, j, k) for i in range(len(thetas)) for j in range(len(phis)) for k in range(len(radii))]
values = np.zeros(len(thetas) * len(phis) * len(radii))

In [60]:
print(np.trace(goal@goal.H))
print(goal_reg)
print(dm_to_bloch_vector(goal))
print(dm_to_polar_coords(goal))
print(goal)
print('0: ', np.trace(np.outer(np.matrix([1,0]), np.matrix([1,0]).H) @ goal))
print('1: ', np.trace(np.outer(np.matrix([0,1]), np.matrix([0,1]).H) @ goal))

(0.9999999999740942+0j)
(7, 1, 15)
((-0.046210220683166825-8.049116928532385e-15j), (-0.6028898141256764-6.716849298982197e-15j), (0.7964850830379651-7.965850201685498e-15j))
((1.4942980283690859-1.2423961952959737e-14j), (0.6493366346556542+1.0640793200575643e-14j), (0.9999999999870368-1.9232093644367427e-15j))
[[ 0.89824254-9.88098492e-15j -0.02310511+3.01444907e-01j]
 [-0.02310511-3.01444907e-01j  0.10175746-1.91513472e-15j]]
0:  (0.8982425415125112-9.880984919163893e-15j)
1:  (0.10175745847454612-1.915134717478395e-15j)


In [61]:
psi = np.matrix([1,0])
rho = np.matrix(np.outer(psi, psi.H))
# rho = random_state_in_reg((0,0,15))
print(np.trace(rho @ rho.H))

for g in [0,1,0]:
    rho = apply_operator(rho, GATES[g])
print(dm_to_bloch_reg(rho))
print(dm_to_bloch_vector(rho))
print(dm_to_polar_coords(rho))
print(rho)
print('0: ', np.trace(np.outer(np.matrix([1,0]), np.matrix([1,0]).H) @ rho))
print('1: ', np.trace(np.outer(np.matrix([0,1]), np.matrix([0,1]).H) @ rho))

1
(7, 1, 15)
((-1.1713460586910958e-16+0j), (-0.7071067811865474-1.6258839764163454e-17j), (0.7071067811865475+2.299347170293092e-17j))
((1.5707963267948966+3.8089455753285884e-33j), (0.7853981633974483-4.7621039126979776e-18j), (0.9999999999999998+2.7755575615628914e-17j))
[[ 8.53553391e-01+1.14967359e-17j -6.66967228e-17+3.53553391e-01j]
 [-5.04378831e-17-3.53553391e-01j  1.46446609e-01-1.14967359e-17j]]
0:  (0.8535533905932736+1.149673585146546e-17j)
1:  (0.1464466094067262-1.149673585146546e-17j)


In [62]:
transitions = [np.zeros((len(states), len(states)), dtype=np.half) for i in range(len(GATES))]

# building transition matrices
for ind, s in enumerate(states):
    if (ind % 500 == 0): print('.', end='')
    for i in range(5):
        rho = random_state_in_reg(s)
        for j, gate in enumerate(GATES):
            n_state = dm_to_bloch_reg(apply_operator(rho, gate))
            n_state_ind = states.index(n_state)
            state_ind = states.index(dm_to_bloch_reg(rho))
            transitions[j][state_ind][n_state_ind] += 1

for i in range(len(GATES)):
    for j in range(len(states)):
        transitions[i][j] = np.nan_to_num(transitions[i][j] / sum(transitions[i][j]))

.........

  app.launch_new_instance()


In [543]:
with np.load('transitions.npz') as data:
    transitions = data['a']

In [63]:
def R(state, action):
    # pass
    if (state == goal_reg):
        return 1
    #     if (action <= len(GATES) - 2):
    #         return 0
    #     else:
    #         return 0.1 # to encourage using identity
    else:
        return 0

In [64]:
def policy_eval(policy, discount_factor=0.9, epsilon=0.0001):
    V_old = np.zeros(len(states))
    while True:
    # for i in range(1):
        V_new = np.zeros(len(states))
        delta = 0
        for s, _ in enumerate(states):
            v_fn = 0
            action_probs = policy[s]
            for a, _ in enumerate(GATES):
                p_trans = transitions[a][s]
                p_next_states = np.nonzero(transitions[a][s])[0]
                for next_s in p_next_states:
                    v_fn += action_probs[a] * p_trans[next_s] * (R(states[s], a) + discount_factor * V_old[next_s])
            delta = max(delta, abs(v_fn - V_old[s]))
            V_new[s] = v_fn
        V_old = V_new
        if(delta < epsilon):
            break
    return np.array(V_old)

In [65]:
def policy_improvement(policy_eval_fn=policy_eval, discount_factor=0.9):      
    def one_step_lookahead(s, V_old):
        actions = np.zeros(len(GATES))
        for a in range(len(GATES)):
            v_fn = 0
            p_trans = transitions[a][s]
            p_next_states = np.nonzero(transitions[a][s])[0]
            for next_s in p_next_states:
                v_fn += p_trans[next_s] * (R(states[s], a) + discount_factor * V_old[next_s])
            actions[a] = v_fn
        return actions
    policy = np.ones([len(states), len(GATES)]) / len(GATES)
    actions_values = np.zeros(len(GATES))
    
    while True:
        value_fn = policy_eval_fn(policy)
        policy_stable = True
        for s in range(len(states)):
            actions_values = one_step_lookahead(s, value_fn)
            best_action = np.argmax(actions_values)
            chosen_action = np.argmax(policy[s])
            if(best_action != chosen_action):
                policy_stable = False
            policy[s] = np.eye(len(GATES))[best_action]
        
        if(policy_stable):
            return policy, value_fn

In [66]:
policy, v = policy_improvement(policy_eval)

In [595]:
np.savetxt('v.txt', v, fmt='%2f')

In [67]:
np.count_nonzero(v)

64

In [70]:
for s in np.argwhere(v>0):
    print(states[s[0]])

(0, 0, 15)
(0, 1, 15)
(0, 2, 15)
(0, 3, 15)
(0, 4, 15)
(0, 5, 15)
(0, 6, 15)
(0, 7, 15)
(1, 0, 15)
(1, 1, 15)
(1, 2, 15)
(1, 3, 15)
(1, 4, 15)
(1, 5, 15)
(1, 6, 15)
(1, 7, 15)
(2, 0, 15)
(2, 1, 15)
(2, 2, 15)
(2, 3, 15)
(2, 4, 15)
(2, 5, 15)
(2, 6, 15)
(2, 7, 15)
(3, 0, 15)
(3, 1, 15)
(3, 2, 15)
(3, 3, 15)
(3, 4, 15)
(3, 5, 15)
(3, 6, 15)
(3, 7, 15)
(4, 0, 15)
(4, 1, 15)
(4, 2, 15)
(4, 3, 15)
(4, 4, 15)
(4, 5, 15)
(4, 6, 15)
(4, 7, 15)
(5, 0, 15)
(5, 1, 15)
(5, 2, 15)
(5, 3, 15)
(5, 4, 15)
(5, 5, 15)
(5, 6, 15)
(5, 7, 15)
(6, 0, 15)
(6, 1, 15)
(6, 2, 15)
(6, 3, 15)
(6, 4, 15)
(6, 5, 15)
(6, 6, 15)
(6, 7, 15)
(7, 0, 15)
(7, 1, 15)
(7, 2, 15)
(7, 3, 15)
(7, 4, 15)
(7, 5, 15)
(7, 6, 15)
(7, 7, 15)


In [52]:
optimal_programs = []
for i in range(1):
    converged = False
    # while not converged:
    for j in range(1):
        psi = np.matrix([1,0])
        s = np.matrix(np.outer(psi, psi.H))
        s = random_state_in_reg((0, 0, k-1))
        prog = []
        counter = 0
        while counter < 10:
            print(dm_fidelity(goal, s))
            action = np.argmax(policy[states.index(dm_to_bloch_reg(s))])
            next_s = apply_operator(s, GATES[action])
            prog.append(action)
            # next_s = random_state_in_reg(dm_to_bloch_reg(next_s))
            s = next_s
            counter += 1
            if (dm_to_bloch_reg(s) == goal_reg):
                print('converged')
                converged = True
                break
        
    optimal_programs.append(prog)
optimal_programs

(0.870515376638716-4.387926769907144e-09j)
(0.5687859073900382-4.433989446940021e-09j)
(0.8705153734632788-1.9639491264975648e-09j)
(0.5687859066203695-3.980132889633813e-09j)
(0.8705153773771726-5.582581009277856e-09j)
(0.5687859083483655-3.383414714720309e-09j)
(0.870515377021319-4.200717889533577e-09j)
(0.5687859074904232-4.206106810979554e-09j)
(0.870515377754597-3.9802862351640105e-09j)
(0.5687859077449888-3.3115776655042486e-09j)


[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]

In [550]:
psi = np.matrix([1,0])
s = np.matrix(np.outer(psi, psi.H))
action = np.argmax(policy[states.index(dm_to_bloch_reg(s))])
action



0

In [36]:
rho = np.matrix([[1/2, (np.sqrt(3)/20) + 2/5], [(np.sqrt(3)/20) + 2/5, 1/2]])


matrix([[0.5       , 0.48660254],
        [0.48660254, 0.5       ]])

In [37]:
x = np.trace(np.matrix([[0,1],[1,0]]) @ rho)
y = np.trace(np.matrix([[0,-1j],[1j,0]]) @ rho)
z = np.trace(np.matrix([[1,0],[0,-1]]) @ rho)
print(x,y,z)

0.9732050807568877 0j 0.0


In [39]:
(np.eye(2) + x*np.array([[0,1],[1,0]]) + y*np.array([[0, -1j], [1j, 0]]) + z*np.array([[1,0], [0,-1]]))/2

array([[0.5       +0.j, 0.48660254+0.j],
       [0.48660254+0.j, 0.5       +0.j]])