<a href="https://colab.research.google.com/github/Advanced-Research-Centre/QGym/blob/main/QGym_v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Google Colab port of Kajetan Knoop's code

https://github.com/Advanced-Research-Centre/QGym/commit/6148a7623866d697d327f8a9a40023b281de1cdf

In [38]:
%%capture
!pip3 install gymnasium
!pip3 install qiskit

In [39]:
from qiskit import QuantumCircuit, Aer
from qiskit.quantum_info import Statevector
from qiskit.circuit.library import XGate, YGate, ZGate, HGate, SGate, TGate, IGate
import gymnasium as gym
from gymnasium import spaces
import numpy as np
from gymnasium.envs.registration import register

In [40]:
class QiskitEnv(gym.Env):
    def __init__(self, size=1):
        self.size = size  # The size of the circuit
        self.circuit = QuantumCircuit(self.size)

        # We have 4 different gates
        self.action_space = spaces.Discrete(4)

        self._action_to_direction = {
            0: XGate(),
            1: YGate(),
            2: ZGate(),
            3: HGate(),
        }

        self.observation_space = spaces.Box(0.0, 1.0, (1, 1), dtype=np.float32)

    def get_score(self):
        sim = Aer.get_backend('statevector_simulator')
        result = sim.run(self.circuit, shots=1).result().get_statevector()
        return Statevector(result).probabilities()[1]

    def reset(self):
        self.circuit = QuantumCircuit(self.size)

        return self.get_score()

    def step(self, action):
        self.circuit.append(self._action_to_direction[action], [0])
        return self.get_score(), self.get_score()

    def render(self):
        return self.circuit.draw(output="mpl")

In [50]:
register(
    id='kk/QGym-v0',
    entry_point=QiskitEnv
)
env = gym.make('kk/QGym-v0')


In [53]:
env.reset()
env.checked_step = True

for _ in range(10):
    action = env.action_space.sample()  # agent policy that uses the observation and info

    observation, reward = env.step(action)
    # observation, reward, terminated, truncated, info = env.step(action)
    # if terminated:
    #     break

    print(action, observation, reward)

env.close()

2 0.0 0.0
1 1.0 1.0
2 1.0 1.0
3 0.5000000000000001 0.5000000000000001
0 0.4999999999999999 0.4999999999999999
2 0.4999999999999999 0.4999999999999999
3 3.749399456654644e-33 3.749399456654644e-33
2 3.749399456654644e-33 3.749399456654644e-33
1 1.0 1.0
3 0.5000000000000001 0.5000000000000001
