In [1]:
import gymnasium as gym
from gymnasium import spaces
import math
import random
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple, deque
from itertools import count

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

import numpy as np
import random
import qutip as qt
from qutip import sigmax, sigmay, sigmaz
from IPython import display
from collections import deque
from typing import Optional

To test our DQN, we will be using the set of single-qubit quantum gates below for the compilation. This is a set of universal quantum gates, so they can approximate any $2 \times 2$  unitary matrix (single-qubit gate) with arbitraty precision. The goal being that our algorithm finds the optimal way to compile (as in approximate) a single-qubit gate with a determined precision.

$V_1 = \frac{1}{\sqrt{5}} \begin{pmatrix}
1 & 2i \\
2i & 1
\end{pmatrix}, \quad
V_2 = \frac{1}{\sqrt{5}} \begin{pmatrix}
1 & 2 \\
-2 & 1
\end{pmatrix}, \quad
V_3 = \frac{1}{\sqrt{5}} \begin{pmatrix}
1 + 2i & 0 \\
0 & 1 - 2i
\end{pmatrix}$


In [2]:
mat1 = (1/np.sqrt(5)) * np.array([[1, 2j], [2j, 1]])
mat2 = (1/np.sqrt(5)) * np.array([[1, 2], [-2, 1]])
mat3 = (1/np.sqrt(5)) * np.array([[1 + 2j, 0], [0, 1 - 2j]])
dim = [[2],[2]]
efficient_set = []

v1 = qt.Qobj(mat1, dims = dim)
v2 = qt.Qobj(mat2, dims = dim)
v3 = qt.Qobj(mat3, dims = dim)

efficient_set = [v1, v2, v3]

For the training process, the target gate will be a product of randomly sampled matrices from a set constituted from the matrices above (which should simplify the training process), as well as fundamental gates in quantum computing like the Pauli gates, and the Hadamard gates. We implement the function that will do that :

In [3]:
h = qt.gates.hadamard_transform()
gateList = [h, sigmax(), sigmay(), sigmaz(), v1, v2, v3]

def shuffling(lst, max_length) :
    n = random.randint(1, max_length)
    matrix = qt.gates.qeye(2) #identity matrix
    for i in range(n) :
         mat = np.random.choice(lst)
         matrix *= mat
    return matrix



In [4]:
device = torch.device(
    "cuda" if torch.cuda.is_available() else
    "mps" if torch.backends.mps.is_available() else
    "cpu"
)

We define a function that takes a quantum state and return the coordinates of the corresponding vector on the Bloch sphere.

In [5]:
def state_bloch(state) :
    a = qt.basis(2, 0).dag() * state
    b = qt.basis(2, 1).dag() * state
    x = (2*a*b.conjugate()).real
    y = (2*a*b.conjugate()).imag
    z = abs(a)**2 - abs(b)**2
    return np.array([x, y, z])


We now build our gym environment, which is the environment the agent will interact with.

In [6]:
class QuantumEnv(gym.Env) :
    def __init__(self, gateSet, target_gate = sigmax()) :
        self.state = qt.basis(2, 0)
        self.bloch = state_bloch(self.state)
       
        self.targetState = target_gate * self.state
        self.targetBloch = state_bloch(self.targetState)

        self.bloch_sphere = qt.Bloch()
        self.bloch_sphere.add_vectors(self.targetBloch)
        self.bloch_sphere.add_points(self.bloch)

        self.gateSet = gateSet
        self.episode = 0
        
        self.action_space = spaces.Discrete(len(gateSet))
        self.observation_space = spaces.Dict({
            "agent" : spaces.Box(low = -1.0, high = 1.0, shape = (3,), dtype = np.float64),
            "target" : spaces.Box(low = -1.0, high = 1.0, shape = (3,), dtype = np.float64)})


    def get_obs(self) :
        return {
            "agent" : self.bloch,
            "target" : self.targetBloch}
    
    def get_info(self) :
        return {
            "fidelity" : abs(self.state.dag() * self.targetState)**2
        }
    

    def reset(self, seed: Optional[int] = None, options: Optional[dict] = None) :
        super().reset(seed=seed)

        self.state = qt.basis(2, 0)
        self.bloch = state_bloch(self.state)
       
        self.targetState =  shuffling(gateList, 60) * self.state
        self.targetBloch = state_bloch(self.targetState)

        self.bloch_sphere = qt.Bloch()
        self.bloch_sphere.add_vectors(self.targetBloch)
        self.bloch_sphere.add_points(self.bloch)

        observation = self.get_obs()
        info = self.get_info()

        self.episode = 0

        return observation, info
        



    def step(self, action) :
        self.state = self.gateSet[action] * self.state
        self.bloch =  state_bloch(self.state)
        self.bloch_sphere.add_points(self.bloch)
        terminated = False
        truncated = False

        self.episode = self.episode + 1
        observation = self.get_obs()
        info = self.get_info()
        fidelity = info["fidelity"]
        if fidelity > 0.99 : #we aim for 99% fidelity
            terminated = True
        reward = 0 if terminated else -1
        
        if self.episode > 500 :
            truncated = True
    
        
        return observation, reward, terminated, truncated, info


    def render(self, mode ="human") :
         self.bloch_sphere.show()

gym.register(
    id = "gymnasium_env/QuantumEnv",
    entry_point = QuantumEnv,
)

<h1>Replay Memory<h1>

We use experience replay memory, which consists of storing the transitions done by the agent, then from it sampling randomly steps taken by the agent to have a batch of uncorrelated steps for the training process.

In [7]:
Transition = namedtuple('Transition',
                        ('state', 'action', 'next_state', 'reward'))

class ReplayMemory(object):

    def __init__(self, capacity):
        self.memory = deque([], maxlen=capacity)

    def push(self, *args):
        self.memory.append(Transition(*args))

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)

Building the neural network :

In [8]:
class DeepQNetwork(nn.Module) :

    def __init__(self, n_obs, n_actions ) :
        super(DeepQNetwork, self).__init__()
        self.layer1 = nn.Linear(n_obs, 128)
        self.layer2 = nn.Linear(128, 128)
        self.layer3 = nn.Linear(128, n_actions)

    def forward(self, state):
        state = F.relu(self.layer1(state))
        state = F.relu(self.layer2(state))
        return self.layer3(state)

In [9]:
env = gym.make("gymnasium_env/QuantumEnv",
                gateSet = efficient_set)

n_actions = env.action_space.n

obs, info = env.reset()

n_agent = len(obs["agent"])
#policy network :
policy = DeepQNetwork(n_agent + 1, n_actions).to(device)
#target network
target = DeepQNetwork(n_agent + 1, n_actions).to(device)
target.load_state_dict(policy.state_dict())


<All keys matched successfully>

$\epsilon$-Greedy Algorithm : We use this algorithm to decide which action to pursue, compromising between exploration and exploitation : with probability $\epsilon$, the algorithm explores meaning it takes new actions (random action chosen) and explores the potential reward, with probability $1 - \epsilon$, the algorithm exploits the action which yields the highest reward (with the currently available information).

In [10]:
batch_size = 256
gamma = 0.95
start_epsilon = 0.85
end_epsilon= 0.05
decay = 1000
tau = 0.005
LearningRate = 1e-4



def action_choice(state) :
    threshhold = end_epsilon + (start_epsilon - end_epsilon)* \
    np.exp(- env.episode/decay)  ###-1 *
    
    if np.random < threshhold :
        return torch.tensor(env.action_space.sample(), device=device, dtype=torch.long)
    else :
        with torch.no_grad() :
            index = policy(state).max(1).indices.view(1,1)
            return torch.tensor(env.action_space[index], device=device, dtype = torch.long)



<h3> References :<h3>


- Aram W. Harrow, Benjamin Recht, Isaac L. Chuang, Efficient Discrete Approximations of Quantum Gates :

https://arxiv.org/pdf/quant-ph/0111031

- Lorenzo Moro, Matteo G. A. Paris, Marcello Restelli & Enrico Prati, Quantum compiling by deep reinforcement learning :

https://www.nature.com/articles/s42005-021-00684-3 