In [14]:
import gymnasium as gym
from gymnasium import spaces
import math
import random
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple, deque
from itertools import count

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import numpy as np
import random
import qutip as qt
from qutip import sigmax, sigmay, sigmaz
from IPython import display
from collections import deque
from typing import Optional

$V_1 = \frac{1}{\sqrt{5}} \begin{pmatrix}
1 & 2i \\
2i & 1
\end{pmatrix}, \quad
V_2 = \frac{1}{\sqrt{5}} \begin{pmatrix}
1 & 2 \\
-2 & 1
\end{pmatrix}, \quad
V_3 = \frac{1}{\sqrt{5}} \begin{pmatrix}
1 + 2i & 0 \\
0 & 1 - 2i
\end{pmatrix}$


In [15]:
mat1 = (1/np.sqrt(5)) * np.array([[1, 2j], [2j, 1]])
mat2 = (1/np.sqrt(5)) * np.array([[1, 2], [-2, 1]])
mat3 = (1/np.sqrt(5)) * np.array([[1 + 2j, 0], [0, 1 - 2j]])
dim = [[2],[2]]
efficient_set = []

v1 = qt.Qobj(mat1, dims = dim)
v2 = qt.Qobj(mat2, dims = dim)
v3 = qt.Qobj(mat3, dims = dim)

efficient_set = [v1, v2, v3]

In [16]:
h = qt.gates.hadamard_transform()
gateList = [h, sigmax(), sigmay(), sigmaz(), v1, v2, v3]

def shuffling(lst, max_length) :
    n = random.randint(1, max_length)
    matrix = qt.gates.qeye(2) #identity matrix
    for i in range(n) :
         mat = np.random.choice(lst)
         matrix *= mat
    return matrix



In [17]:
device = torch.device(
    "cuda" if torch.cuda.is_available() else
    "mps" if torch.backends.mps.is_available() else
    "cpu"
)

In [18]:
def state_bloch(state) :
    a = qt.basis(2, 0).dag() * state
    b = qt.basis(2, 1).dag() * state
    x = (2*a*b.conjugate()).real
    y = (2*a*b.conjugate()).imag
    z = abs(a)**2 - abs(b)**2
    return np.array([x, y, z])


In [19]:
class QuantumEnv(gym.Env) :
    def __init__(self, gateSet, target_gate = sigmax()) :
        self.state = qt.basis(2, 0)
        self.bloch = state_bloch(self.state)
       
        self.targetState = target_gate * self.state
        self.targetBloch = state_bloch(self.targetState)

        self.bloch_sphere = qt.Bloch()
        self.bloch_sphere.add_vectors(self.targetBloch)
        self.bloch_sphere.add_points(self.bloch)

        self.gateSet = gateSet
        self.episode = 0
        
        self.action_space = spaces.Discrete(len(gateSet))
        self.observation_space = spaces.Dict({
            "agent" : spaces.Box(low = -1.0, high = 1.0, shape = (3,), dtype = np.float32),
            "target" : spaces.Box(low = -1.0, high = 1.0, shape = (3,), dtype = np.float32)})


    def get_obs(self) :
        return {
            "agent" : self.bloch,
            "target" : self.targetBloch}
    
    def get_info(self) :
        return {
            "fidelity" : abs(self.state.dag() * self.targetState)**2
        }
    

    def reset(self, seed: Optional[int] = None, options: Optional[dict] = None) :
        super().reset(seed=seed)

        self.state = qt.basis(2, 0)
        self.bloch = state_bloch(self.state)
       
        self.targetState =  shuffling(gateList, 60) * self.state
        self.targetBloch = state_bloch(self.targetState)

        self.bloch_sphere = qt.Bloch()
        self.bloch_sphere.add_vectors(self.targetBloch)
        self.bloch_sphere.add_points(self.bloch)

        observation = self.get_obs()
        info = self.get_info()

        self.episode = 0

        return observation, info
        



    def step(self, action) :
        self.state = self.gateSet[action] * self.state
        self.bloch =  state_bloch(self.state)
        self.bloch_sphere.add_points(self.bloch)
        terminated = False
        truncated = False

        self.episode = self.episode + 1
        observation = self.get_obs()
        info = self.get_info()
        fidelity = info["fidelity"]
        if fidelity > 0.99 : #we aim for 99% fidelity
            terminated = True
        reward = 0 if terminated else -1
        
        if self.episode > 500 :
            truncated = True
    
        
        return observation, reward, terminated, truncated, info


    def render(self, mode ="human") :
         self.bloch_sphere.show()

gym.register(
    id = "gymnasium_env/QuantumEnv",
    entry_point = QuantumEnv,
)

<h1>Replay Memory<h1>

In [20]:
Transition = namedtuple('Transition',
                        ('state', 'action', 'next_state', 'reward'))

class ReplayMemory(object):

    def __init__(self, capacity):
        self.memory = deque([], maxlen=capacity)

    def push(self, *args):
        self.memory.append(Transition(*args))

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)

In [21]:
class DeepQNetwork(nn.Module) :

    def __init__(self, n_obs, n_actions ) :
        super(DeepQNetwork, self).__init__()
        self.layer1 = nn.Linear(n_obs, 128)
        self.layer2 = nn.Linear(128, 128)
        self.layer3 = nn.Linear(128, n_actions)

    def forward(self, state):
        state = F.relu(self.layer1(state))
        state = F.relu(self.layer2(state))
        return self.layer3(state)

In [25]:
env = gym.make("gymnasium_env/QuantumEnv",
                gateSet = efficient_set)

n_actions = env.action_space.n

obs, info = env.reset()

n_agent = len(obs["agent"])
#policy network :
policy = DeepQNetwork(n_agent + 1, n_actions).to(device)
#target network
target = DeepQNetwork(n_agent + 1, n_actions).to(device)
target.load_state_dict(policy.state_dict())


  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")


<All keys matched successfully>

$\epsilon$-Greedy Algorithm

In [None]:
BATCH_SIZE = 256
GAMMA = 0.95
start_epsilon = 0.85
end_epsilon= 0.05
decay = 1000
TAU = 0.005
LearningRate = 1e-4



def action_choice(state) :
    threshhold = end_epsilon + (start_epsilon - end_epsilon)* \
    np.exp(- env.episode/decay)  ###-1 *
    
    if np.random < threshhold :
        return torch.tensor(env.action_space.sample(), device=device, dtype=torch.long)
    else :
        with torch.no_grad() :
            index = policy(state).max(1).indices.view(1,1)
            return torch.tensor(env.action_space[index], device=device, dtype = torch.long)



In [None]:
optimizer = optim.AdamW(policy.parameters(), lr=LearningRate, amsgrad=True)
memory = ReplayMemory(10000)

def visualization() :
    