<a href="https://colab.research.google.com/github/Advanced-Research-Centre/QGym/blob/main/QGym_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Google Colab port of Kajetan Knoop's code

https://github.com/Advanced-Research-Centre/QGym/commit/787b428a4c82bde97aa120e7319da6d7c92d25a2

In [72]:
from qiskit import QuantumCircuit, Aer
from qiskit.quantum_info import Statevector
from qiskit.circuit.library import XGate, YGate, ZGate, HGate, SGate, TGate, IGate, CXGate, CYGate, CZGate
import gymnasium as gym
from gymnasium import spaces
import numpy as np
from gymnasium.envs.registration import register
import qiskit.tools.jupyter
%matplotlib inline

In [85]:
class QGymEnv(gym.Env):
    def __init__(self, size=1, target_state=None):

        self.size = size  # The size of the circuit
        self.circuit = QuantumCircuit(self.size)
        if target_state == None:
            self.t_state = np.zeros(2**size)
            self.t_state[0] = 1.0
        else:
            self.t_state = target_state
        
        self.c0 = 1       # coefficient to weigh the fidelity
        self.c1 = -0.01   # coefficient to weigh the depth

        self._available_gates = [XGate, YGate, ZGate, HGate, SGate, TGate, IGate, CXGate, CYGate, CZGate]
        self.action_space = spaces.Tuple((spaces.Discrete(len(self._available_gates)), spaces.Discrete(self.size)))
        self.two_qubit_set = ["cx", "cy", "cz"]
        
        self.observation_space = spaces.Box(0.0, 1.0, (1, 1), dtype=np.float64)

    def _get_depth(self):
        return self.circuit.depth()

    def _get_fidelity(self):
        sim = Aer.get_backend('statevector_simulator')
        result = np.array(sim.run(self.circuit, shots=1).result().get_statevector().data)
        fid = np.abs(np.matrix.getH(result)@self.t_state)
        #print(float(fid))
        return fid

    def reset(self):
        self.circuit = QuantumCircuit(self.size)
        return

    def step(self, action):
        if self._available_gates[action[0]]().name in self.two_qubit_set:
            self.circuit.append(self._available_gates[action[0]](), [action[1]%self.size, (action[1]+1)%self.size])
        else:
            self.circuit.append(self._available_gates[action[0]](), [action[1]%self.size])
        d = self._get_depth()
        f = self._get_fidelity()
        reward = self.c0*f + self.c1*d
        terminated = (f > 0.90)
        observation = f   # unused for now
        info = {}         # unused for now
        truncated = False # unused for now
        return observation, reward, terminated, truncated, info

    def get_circuit(self):
        return self.circuit

    def render(self):
        self.circuit.draw(output="mpl")
        return

In [86]:
register(
    id='kk/QGym-v2',
    entry_point=QGymEnv
)
env = gym.make('kk/QGym-v2',  size=2, target_state=[1/np.sqrt(2), 0.0, 0.0, 1/np.sqrt(2)])

In [87]:
env.reset()
env.checked_step = True

for step in range(100):
    action = env.action_space.sample()

    observation, reward, terminated, truncated, info = env.step(action)
    print("Step:",(step+1),"\tAction:",action,"\tReward:",reward)
    print(env.get_circuit())
    if terminated:
        print(env.get_circuit())
        break

env.close()

Step: 1 	Action: (5, 1) 	Reward: 0.6971067811865475
          
q_0: ─────
     ┌───┐
q_1: ┤ T ├
     └───┘
Step: 2 	Action: (6, 1) 	Reward: 0.6871067811865474
               
q_0: ──────────
     ┌───┐┌───┐
q_1: ┤ T ├┤ I ├
     └───┘└───┘
Step: 3 	Action: (8, 1) 	Reward: 0.6771067811865474
               ┌───┐
q_0: ──────────┤ Y ├
     ┌───┐┌───┐└─┬─┘
q_1: ┤ T ├┤ I ├──■──
     └───┘└───┘     
Step: 4 	Action: (1, 0) 	Reward: -0.04
               ┌───┐┌───┐
q_0: ──────────┤ Y ├┤ Y ├
     ┌───┐┌───┐└─┬─┘└───┘
q_1: ┤ T ├┤ I ├──■───────
     └───┘└───┘          
Step: 5 	Action: (2, 0) 	Reward: -0.05
               ┌───┐┌───┐┌───┐
q_0: ──────────┤ Y ├┤ Y ├┤ Z ├
     ┌───┐┌───┐└─┬─┘└───┘└───┘
q_1: ┤ T ├┤ I ├──■────────────
     └───┘└───┘               
Step: 6 	Action: (3, 0) 	Reward: 0.44
               ┌───┐┌───┐┌───┐┌───┐
q_0: ──────────┤ Y ├┤ Y ├┤ Z ├┤ H ├
     ┌───┐┌───┐└─┬─┘└───┘└───┘└───┘
q_1: ┤ T ├┤ I ├──■─────────────────
     └───┘└───┘                    
Step: 7 	Action: (8, 0)