In [2]:
import numpy as np
from qiskit import *
import pandas as pd

In [406]:
def dm_to_bloch_reg(rho):
    # rho is a density matrix
    state = dm_to_bloch_vector(rho)
    state = cartesian_to_spherical(state)

    # state is now (theta, phi, r)
    for i, intv in enumerate(thetas):
        if (state[0] in intv):
            theta_reg = i
    for i, intv in enumerate(phis):
        if (state[1] in intv):
            phi_reg = i
    for i, intv in enumerate(radii):
        if (state[2] in intv):
            r_reg = i
    return (theta_reg, phi_reg, r_reg)

def dm_to_bloch_vector(rho):
    x = np.array([[0,1],[1,0]])
    y = np.array([[0,-1j],[1j,0]])
    z = np.array([[1,0],[0,-1]])

    # where rho is a density matrix
    return (np.trace(rho @ x), np.trace(rho @ y), np.trace(rho @ z))

def dm_to_polar_coords(rho):
    # rho is a density matrix
    return cartesian_to_spherical(dm_to_bloch_vector(rho))

def cartesian_to_spherical(state):
    x = state[0]
    y = state[1]
    z = state[2]

    r = np.sqrt(x**2 + y**2 + z**2)
    theta = max(0, np.arctan(y/x))
    phi = max(0, np.arccos(z/r))

    return (theta, phi, r)

def spherical_to_cartesian(state):
    theta = state[0]
    phi = state[1]
    r = state[2]

    x = r*np.cos(theta)*np.sin(phi)
    y = r*np.sin(theta)*np.sin(phi)
    z = r*np.cos(phi)

    return (x, y, z)

def random_state_in_reg(reg):
    # where reg is a tuple specifying (theta, phi, radius)
    # returns a density matrix
    theta = np.random.uniform(max(0, thetas[reg[0]].left), min(thetas[reg[0]].right, np.pi))

    # maybe consider the poles as one state
    # if (reg == (0, 0) or reg == (len(thetas)-1, len(phis)-1)):
        # phi = np.random.uniform(-np.pi, np.pi)
    # else:
    phi = np.random.uniform(max(-np.pi, phis[reg[1]].left), min(phis[reg[1]].right, np.pi))
    r = np.random.uniform(radii[reg[2]].left, radii[reg[2]].right)
    
    state = spherical_to_cartesian((theta, phi, r))
    rho = (np.eye(2) + state[0]*np.array([[0,1],[1,0]]) + state[1]*np.array([[0, -1j], [1j, 0]]) + state[2]*np.array([[1,0], [0,-1]]))/2
    return np.matrix(rho)

def generate_target_state(n):
    s = np.matrix([1, 0])
    rho = np.outer(s, s.H)
    
    for i in range(n):
        ht = GATES[0] @ GATES[1]
        rho = apply_operator(rho, ht)
    return rho

def apply_operator(rho, op): # add noise
    return op @ rho @ op.H

In [465]:
n = 10**5
k = 16

GATES = [
    np.matrix([[1, 1], [1, -1]]) * 1/np.sqrt(2), # H
    np.matrix([[1, 0], [0, np.exp(1j * np.pi / 4)]]), # T
    np.matrix([[1, 0], [0, 1]]) # I
]

goal = generate_target_state(n=n)
thetas = pd.cut(np.linspace(0, np.pi, k), k, precision=10, include_lowest=True)
phis = pd.cut(np.linspace(0, 2*np.pi, k), k,  precision=10, include_lowest=True)
rs = (1 - np.geomspace(1e-3, 1, k))[::-1]
rs[0] = 0
rs[-1] = 1

# radii = []
# for i in range(len(rs) - 1):
#     radii.append(pd.Interval(rs[i], rs[i+1], 'right'))
radii = pd.cut(rs, k, precision=10)
goal_reg = dm_to_bloch_reg(goal)

states = [(i, j, k) for i in range(len(thetas)) for j in range(len(phis)) for k in range(len(radii))]
values = np.zeros(len(thetas) * len(phis) * len(radii))

In [486]:
print(np.trace(goal@goal.H))
print(goal_reg)
print(dm_to_bloch_vector(goal))
print(dm_to_polar_coords(goal))
print(goal)
print('0: ', np.trace(np.outer(np.matrix([1,0]), np.matrix([1,0]).H) @ goal))
print('1: ', np.trace(np.outer(np.matrix([0,1]), np.matrix([0,1]).H) @ goal))

(0.9999999999740942+0j)
(7, 1, 15)
((-0.046210220683166825-8.049116928532385e-15j), (-0.6028898141256764-6.716849298982197e-15j), (0.7964850830379651-7.965850201685498e-15j))
((1.4942980283690859-1.2423961952959737e-14j), (0.6493366346556542+1.0640793200575643e-14j), (0.9999999999870368-1.9232093644367427e-15j))
[[ 0.89824254-9.88098492e-15j -0.02310511+3.01444907e-01j]
 [-0.02310511-3.01444907e-01j  0.10175746-1.91513472e-15j]]
0:  (0.8982425415125112-9.880984919163893e-15j)
1:  (0.10175745847454612-1.915134717478395e-15j)


In [487]:
psi = np.matrix([1,0])
rho = np.matrix(np.outer(psi, psi.H))
rho = random_state_in_reg((0,0,15))
print(np.trace(rho @ rho.H))
rho = apply_operator(rho, GATES[0])
rho = apply_operator(rho, GATES[1])
rho = apply_operator(rho, GATES[0])
print(dm_to_bloch_reg(rho))
print(dm_to_bloch_vector(rho))
print(dm_to_polar_coords(rho))
print(rho)
print('0: ', np.trace(np.outer(np.matrix([1,0]), np.matrix([1,0]).H) @ rho))
print('1: ', np.trace(np.outer(np.matrix([0,1]), np.matrix([0,1]).H) @ rho))

(0.9924299710295353+0j)
(0, 2, 15)
((0.34339679293860315+0j), (-0.636708187995334+0j), (0.6793682859820958+4.041062560271072e-17j))
(0, (0.8167823165145502-2.968275562664026e-17j), (0.992401099384251+2.7663912774997735e-17j))
[[0.83968414+2.02053128e-17j 0.1716984 +3.18354094e-01j]
 [0.1716984 -3.18354094e-01j 0.16031586-2.02053128e-17j]]
0:  (0.8396841429910478+2.020531280135536e-17j)
1:  (0.16031585700895207-2.020531280135536e-17j)


In [282]:
transitions = [np.zeros((len(states), len(states)), dtype=np.half) for i in range(len(GATES))]

# building transition matrices
for ind, s in enumerate(states):
    if (ind % 500 == 0): print('.', end='')
    for i in range(5):
        rho = random_state_in_reg(s)
        for j, gate in enumerate(GATES):
            n_state = dm_to_bloch_reg(apply_operator(rho, gate))
            n_state_ind = states.index(n_state)
            state_ind = states.index(dm_to_bloch_reg(rho))
            transitions[j][state_ind][n_state_ind] += 1

for i in range(len(GATES)):
    for j in range(len(states)):
        transitions[i][j] = np.nan_to_num(transitions[i][j] / sum(transitions[i][j]))

.

KeyboardInterrupt: 

In [182]:
with np.load('transitions.npz') as data:
    transitions = data['a']

In [48]:
def R(state, action):
    if (state == goal_reg):
        return 1
        # if (action <= len(GATES) - 2):
        #     return 0
        # else:
        #     return 0.1 # to encourage using identity
    else:
        return 0

In [192]:
def policy_eval(policy, discount_factor=0.8, epsilon=0.001):
    V_old = np.zeros(len(states))
    while True:
    # for i in range(1):
        V_new = np.zeros(len(states))
        delta = 0
        for s, _ in enumerate(states):
            v_fn = 0
            action_probs = policy[s]
            for a, _ in enumerate(GATES):
                p_trans = transitions[a][s]
                p_next_states = np.nonzero(transitions[a][s])[0]
                for next_s in p_next_states:
                    v_fn += action_probs[a] * p_trans[next_s] * (R(states[s], a) + discount_factor * V_old[next_s])
            delta = max(delta, abs(v_fn - V_old[s]))
            V_new[s] = v_fn
        V_old = V_new
        if(delta < epsilon):
            break
    return np.array(V_old)

In [193]:
def policy_improvement(policy_eval_fn=policy_eval, discount_factor=0.8):      
    def one_step_lookahead(s, V_old):
        actions = np.zeros(len(GATES))
        for a in range(len(GATES)):
            v_fn = 0
            p_trans = transitions[a][s]
            p_next_states = np.nonzero(transitions[a][s])[0]
            for next_s in p_next_states:
                v_fn += p_trans[next_s] * (R(states[s], a) + discount_factor * V_old[next_s])
            actions[a] = v_fn
        return actions
    policy = np.ones([len(states), len(GATES)]) / len(GATES)
    actions_values = np.zeros(len(GATES))
    
    while True:
        value_fn = policy_eval_fn(policy)
        policy_stable = True
        for s in range(len(states)):
            actions_values = one_step_lookahead(s, value_fn)
            best_action = np.argmax(actions_values)
            chosen_action = np.argmax(policy[s])
            if(best_action != chosen_action):
                policy_stable = False
            policy[s] = np.eye(len(GATES))[best_action]
        
        if(policy_stable):
            return policy, value_fn

In [194]:
policy, v = policy_improvement(policy_eval)

In [197]:
np.savetxt('v.txt', v)

In [236]:
optimal_programs = []
for i in range(k):
    converged = False
    while not converged:
        s = random_state_in_reg((0, 0, k-1))
        prog = []
        counter = 0
        while counter < 300:
            action = np.argmax(policy[states.index(dm_to_bloch_reg(s))])
            next_s = apply_operator(s, GATES[action])
            prog.append(action)
            # next_s = random_state_in_reg(dm_to_bloch_reg(next_s))
            s = next_s
            counter += 1
            if (dm_to_bloch_reg(s) == goal_reg):
                print('converged')
                converged = True
                break
        
    optimal_programs.append(prog)
optimal_programs

.
.
.
.
.


KeyboardInterrupt: 