<a href="https://colab.research.google.com/github/Advanced-Research-Centre/QGym/blob/main/QGym_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Google Colab port of Kajetan Knoop's code

https://github.com/Advanced-Research-Centre/QGym/commit/787b428a4c82bde97aa120e7319da6d7c92d25a2

In [1]:
%%capture
!pip3 install gymnasium
!pip3 install qiskit
!pip3 install pylatexenc
!pip3 install matplotlib

In [2]:
from qiskit import QuantumCircuit, Aer
from qiskit.quantum_info import Statevector
from qiskit.circuit.library import XGate, YGate, ZGate, HGate, SGate, TGate, IGate
import gymnasium as gym
from gymnasium import spaces
import numpy as np
from gymnasium.envs.registration import register
import qiskit.tools.jupyter
%matplotlib inline

In [17]:
class QGymEnv(gym.Env):
    def __init__(self, size=1):

        self.size = size  # The size of the circuit
        self.circuit = QuantumCircuit(self.size)

        self.c0 = 1       # coefficient to weigh the fidelity
        self.c1 = -0.01   # coefficient to weigh the depth

        self._available_gates = [XGate, YGate, ZGate, HGate, SGate, TGate, IGate]
        self.action_space = spaces.Tuple((spaces.Discrete(len(self._available_gates)), spaces.Discrete(self.size)))

        self.observation_space = spaces.Box(0.0, 1.0, (1, 1), dtype=np.float64)

    def _get_depth(self):
        return self.circuit.depth()

    def _get_fidelity(self):
        sim = Aer.get_backend('statevector_simulator')
        result = sim.run(self.circuit, shots=1).result().get_statevector()
        return Statevector(result).probabilities()[-1]

    def reset(self):
        self.circuit = QuantumCircuit(self.size)
        return

    def step(self, action):
        self.circuit.append(self._available_gates[action[0]](), [action[1]%self.size])
        d = self._get_depth()
        f = self._get_fidelity()
        reward = self.c0*f + self.c1*d
        terminated = (f > 0.95)
        observation = f   # unused for now
        info = {}         # unused for now
        truncated = False # unused for now
        return observation, reward, terminated, truncated, info

    def get_circuit(self):
        return self.circuit

    def render(self):
        self.circuit.draw(output="mpl")
        return

In [18]:
register(
    id='kk/QGym-v2',
    entry_point=QGymEnv
)
env = gym.make('kk/QGym-v2')

In [22]:
env.reset()
env.checked_step = True

for step in range(10):
    action = env.action_space.sample()

    observation, reward, terminated, truncated, info = env.step(action)
    print("Step:",(step+1),"\tAction:",action,"\tReward:",reward)

    if terminated:
      print(env.get_circuit())
      break

env.close()

Step: 1 	Action: (2, 0) 	Reward: -0.01
Step: 2 	Action: (6, 0) 	Reward: -0.02
Step: 3 	Action: (6, 0) 	Reward: -0.03
Step: 4 	Action: (4, 0) 	Reward: -0.04
Step: 5 	Action: (5, 0) 	Reward: -0.05
Step: 6 	Action: (0, 0) 	Reward: 0.94
   ┌───┐┌───┐┌───┐┌───┐┌───┐┌───┐
q: ┤ Z ├┤ I ├┤ I ├┤ S ├┤ T ├┤ X ├
   └───┘└───┘└───┘└───┘└───┘└───┘
