<a href="https://colab.research.google.com/github/Advanced-Research-Centre/QGym/blob/main/QGym_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Google Colab port of Kajetan Knoop's code

https://github.com/Advanced-Research-Centre/QGym/commit/787b428a4c82bde97aa120e7319da6d7c92d25a2

In [None]:
%%capture
!pip3 install gymnasium
!pip3 install qiskit
!pip3 install qiskit-aer
!pip3 install pylatexenc
!pip3 install matplotlib
!pip3 install tensorflow
!pip3 install tf-agents

In [1]:
from qiskit import QuantumCircuit, Aer
from qiskit.quantum_info import Statevector
from qiskit.circuit.library import XGate, YGate, ZGate, HGate, SGate, TGate, IGate, CXGate, CZGate, CYGate, SwapGate, CHGate, CSGate, CSdgGate, CSwapGate, CCXGate
import gymnasium as gym
from gymnasium import spaces
import numpy as np
from gymnasium.envs.registration import register
import qiskit.tools.jupyter
%matplotlib inline

In [2]:
class QGymEnv(gym.Env):
    def __init__(self, size=1):

        self.size = size  # The size of the circuit
        self.circuit = QuantumCircuit(self.size)

        self.c0 = 1       # coefficient to weigh the fidelity
        self.c1 = -0.01   # coefficient to weigh the depth

        self._available_gates = [XGate, YGate, ZGate, HGate, SGate, TGate, IGate, 
                                 CXGate, CZGate, CYGate, SwapGate, 
                                 CSwapGate, CCXGate]
        self.action_space = spaces.Tuple((spaces.Discrete(len(self._available_gates)), spaces.Discrete(self.size), spaces.Discrete(self.size), spaces.Discrete(self.size)))
        self.observation_space = spaces.Box(0.0, 1.0, (1, 1), dtype=np.float64)

    def _get_depth(self):
        return self.circuit.depth()

    def _get_fidelity(self):
        sim = Aer.get_backend('statevector_simulator')
        result = sim.run(self.circuit, shots=1).result().get_statevector()
        return Statevector(result).probabilities()[-1]

    def reset(self):
        self.circuit = QuantumCircuit(self.size)
        return

    def step(self, action):
        if(action[0] > 10 and self.size > 2):
            if(action[1]%self.size != action[2]%self.size and 
               action[2]%self.size != action[3]%self.size and 
               action[1]%self.size != action[3]%self.size):
                self.circuit.append(self._available_gates[action[0]](), [action[1]%self.size, action[2]%self.size, action[3]%self.size])
            else:
                print("Skipping")
        elif(action[0] > 6 and self.size > 1):
            if(action[1]%self.size != action[2]%self.size):
                self.circuit.append(self._available_gates[action[0]](), [action[1]%self.size, action[2]%self.size])
            else:
                print("Skipping")
        elif(action[0] <= 6):
            self.circuit.append(self._available_gates[action[0]](), [action[1]%self.size])
        else:
            print("Skipping")
        d = self._get_depth()
        f = self._get_fidelity()
        reward = self.c0*f + self.c1*d
        terminated = (f > 0.95)
        observation = f   # unused for now
        info = {}         # unused for now
        truncated = False # unused for now
        return observation, reward, terminated, truncated, info

    def get_circuit(self):
        return self.circuit

    def render(self):
        self.circuit.draw(output="mpl")
        return

In [3]:
register(
    id='kk/QGym-v2',
    entry_point=QGymEnv
)
env = gym.make('kk/QGym-v2')

In [4]:
env.reset()
env.checked_step = True

for step in range(10):
    action = env.action_space.sample()

    observation, reward, terminated, truncated, info = env.step(action)
    print("Step:",(step+1),"\tAction:",action,"\tReward:",reward)

    #if terminated:
    #  print(env.get_circuit())
    #  break

# Always print circuit
print(env.get_circuit())
env.close()

  logger.deprecation(
  logger.deprecation(
  logger.warn(


Step: 1 	Action: (0, 0, 0, 0) 	Reward: 0.99
Step: 2 	Action: (6, 0, 0, 0) 	Reward: 0.98
Step: 3 	Action: (3, 0, 0, 0) 	Reward: 0.4700000000000001
Skipping
Step: 4 	Action: (9, 0, 0, 0) 	Reward: 0.4700000000000001
Step: 5 	Action: (0, 0, 0, 0) 	Reward: 0.4599999999999999
Skipping
Step: 6 	Action: (8, 0, 0, 0) 	Reward: 0.4599999999999999
Skipping
Step: 7 	Action: (8, 0, 0, 0) 	Reward: 0.4599999999999999
Step: 8 	Action: (2, 0, 0, 0) 	Reward: 0.4499999999999999
Step: 9 	Action: (1, 0, 0, 0) 	Reward: 0.4400000000000001
Step: 10 	Action: (4, 0, 0, 0) 	Reward: 0.4300000000000001
   ┌───┐┌───┐┌───┐┌───┐┌───┐┌───┐┌───┐
q: ┤ X ├┤ I ├┤ H ├┤ X ├┤ Z ├┤ Y ├┤ S ├
   └───┘└───┘└───┘└───┘└───┘└───┘└───┘


  logger.warn(
  gym.logger.warn("Casting input x to numpy array.")
  logger.warn(f"{pre} is not within the observation space.")
  logger.warn(
