In [1]:
from qiskit_gym.envs import LinearFunctionGym, CliffordGym, PermutationGym
from qiskit_gym.rl import RLSynthesis, PPOConfig, AlphaZeroConfig, BasicPolicyConfig

In [2]:
from qiskit import QuantumCircuit
from qiskit.transpiler import CouplingMap
import numpy as np

# Permutation

### Setup env

In [3]:
cmap_3x3 = CouplingMap.from_grid(3,3, bidirectional=False)
env = PermutationGym.from_coupling_map(cmap_3x3)
rls = RLSynthesis(env, PPOConfig(), BasicPolicyConfig())

rls.env.config["gateset"]

[('SWAP', (0, 1)),
 ('SWAP', (0, 3)),
 ('SWAP', (1, 2)),
 ('SWAP', (1, 4)),
 ('SWAP', (2, 5)),
 ('SWAP', (3, 4)),
 ('SWAP', (3, 6)),
 ('SWAP', (4, 5)),
 ('SWAP', (4, 7)),
 ('SWAP', (5, 8)),
 ('SWAP', (6, 7)),
 ('SWAP', (7, 8))]

### Train the model

In [4]:
rls.learn(num_iterations=10, tb_path="runs/perm_square_3x3/")

[32m2025-08-31 17:45:33.764[0m | [1mINFO    [0m | [36mtwisterl.rl.algorithm[0m:[36mlearn[0m:[36m152[0m - [1m(1/0) {'successes': {'ppo_deterministic': 0.10000000149011612, 'ppo_10': 0.6600000262260437}, 'rewards': {'ppo_deterministic': -0.357421875, 'ppo_10': 0.4847656190395355}, 'difficulty': 1, 'success': 0.10000000149011612, 'reward': -0.357421875} | {'to_rust': 0.006806819, 'eval_ppo_deterministic': 0.004226724, 'eval_ppo_10': 0.010899603, 'collect': 0.015823055, 'data_to_torch': 0.364470922, 'train': 0.352989206, 'total': 0.755962711}[0m
[32m2025-08-31 17:45:33.773[0m | [1mINFO    [0m | [36mtwisterl.rl.algorithm[0m:[36mlearn[0m:[36m160[0m - [1m(1/0) Improved, saved checkpoint![0m
[32m2025-08-31 17:45:33.862[0m | [1mINFO    [0m | [36mtwisterl.rl.algorithm[0m:[36mlearn[0m:[36m152[0m - [1m(1/1) {'successes': {'ppo_deterministic': 0.05000000074505806, 'ppo_10': 0.5699999928474426}, 'rewards': {'ppo_deterministic': -0.4326171875, 'ppo_10': 0.3494140505

### Save (or load) config and model

In [5]:
#rls.save("models/perm_square_3x3.json", "models/perm_square_3x3.pt")

rls = RLSynthesis.from_config_json("models/perm_square_3x3.json", "models/perm_square_3x3.pt")

### Try it

In [6]:
some_perm = np.random.permutation(9)

qc_perm = rls.synth(some_perm, num_searches=1000, num_mcts_searches=0, deterministic=False)
qc_perm.draw(fold=-1)

In [7]:
qc_perm_input = QuantumCircuit(9)
qc_perm_input.swap(0,8)
qc_perm_input.draw(fold=-1)

In [8]:
qc_perm_output = rls.synth(qc_perm_input, num_searches=1000, num_mcts_searches=0, deterministic=False)
qc_perm_output.draw(fold=-1)

# Linear Function

In [9]:
cmap_6_line = CouplingMap.from_line(5, bidirectional=True)
env = LinearFunctionGym.from_coupling_map(cmap_6_line, basis_gates=["CX"])

rls = RLSynthesis(env, PPOConfig(), BasicPolicyConfig())
rls.learn(num_iterations=10, tb_path="runs/lf_5_line_ppo/")  # This will track progress in Tensorboard

[32m2025-08-31 17:45:35.359[0m | [1mINFO    [0m | [36mtwisterl.rl.algorithm[0m:[36mlearn[0m:[36m152[0m - [1m(1/0) {'successes': {'ppo_deterministic': 0.11999999731779099, 'ppo_10': 0.8199999928474426}, 'rewards': {'ppo_deterministic': -0.32734376192092896, 'ppo_10': 0.725390613079071}, 'difficulty': 1, 'success': 0.11999999731779099, 'reward': -0.32734376192092896} | {'to_rust': 0.004612737, 'eval_ppo_deterministic': 0.004384158, 'eval_ppo_10': 0.010670347, 'collect': 0.014801937, 'data_to_torch': 0.01303543, 'train': 0.031168687, 'total': 0.079136125}[0m
[32m2025-08-31 17:45:35.364[0m | [1mINFO    [0m | [36mtwisterl.rl.algorithm[0m:[36mlearn[0m:[36m160[0m - [1m(1/0) Improved, saved checkpoint![0m
[32m2025-08-31 17:45:35.450[0m | [1mINFO    [0m | [36mtwisterl.rl.algorithm[0m:[36mlearn[0m:[36m152[0m - [1m(1/1) {'successes': {'ppo_deterministic': 0.15000000596046448, 'ppo_10': 0.8299999833106995}, 'rewards': {'ppo_deterministic': -0.2822265625, 'ppo_10'

In [10]:
#rls.save("models/lf_5_line.json", "models/lf_5_line.pt")

rls = RLSynthesis.from_config_json("models/lf_5_line.json", "models/lf_5_line.pt")

In [11]:
qc_lf_input = QuantumCircuit(5)
qc_lf_input.cx(0,4)
qc_lf_input.draw(fold=-1)

In [12]:
qc_lf_output = rls.synth(qc_lf_input, num_searches=1000, num_mcts_searches=0, deterministic=False)
qc_lf_output.draw(fold=-1)

In [13]:
from qiskit.circuit.library.generalized_gates import LinearFunction
LinearFunction(qc_lf_input) == LinearFunction(qc_lf_output)

np.True_

# Clifford

In [14]:
# Here we want to do Clifford synthesis but we only allow H and S to be placed on qubit 0
env = CliffordGym(
    num_qubits=3, 
    gateset=[
        ("CX", [0,1]),
        ("CX", [1,0]),
        ("CX", [1,2]),
        ("CX", [2,1]),
        ("SWAP", [0,1]),
        ("SWAP", [1,2]),
        ("H", [0]),
        ("S", [0]),
    ]
)
rls = RLSynthesis(env, PPOConfig(), BasicPolicyConfig())
rls.learn(num_iterations=10, tb_path="runs/clifford_3q_custom/")

[32m2025-08-31 17:45:36.883[0m | [1mINFO    [0m | [36mtwisterl.rl.algorithm[0m:[36mlearn[0m:[36m152[0m - [1m(1/0) {'successes': {'ppo_deterministic': 0.1599999964237213, 'ppo_10': 0.7099999785423279}, 'rewards': {'ppo_deterministic': -0.2671875059604645, 'ppo_10': 0.5599609613418579}, 'difficulty': 1, 'success': 0.1599999964237213, 'reward': -0.2671875059604645} | {'to_rust': 0.005365313, 'eval_ppo_deterministic': 0.003425112, 'eval_ppo_10': 0.00944916, 'collect': 0.015008664, 'data_to_torch': 0.015463646, 'train': 0.030570878, 'total': 0.080090444}[0m
[32m2025-08-31 17:45:36.888[0m | [1mINFO    [0m | [36mtwisterl.rl.algorithm[0m:[36mlearn[0m:[36m160[0m - [1m(1/0) Improved, saved checkpoint![0m
[32m2025-08-31 17:45:36.981[0m | [1mINFO    [0m | [36mtwisterl.rl.algorithm[0m:[36mlearn[0m:[36m152[0m - [1m(1/1) {'successes': {'ppo_deterministic': 0.07999999821186066, 'ppo_10': 0.7300000190734863}, 'rewards': {'ppo_deterministic': -0.38749998807907104, 'ppo

In [15]:
#rls.save("models/clifford_3q_custom.json", "models/clifford_3q_custom.pt")

rls = RLSynthesis.from_config_json("models/clifford_3q_custom.json", "models/clifford_3q_custom.pt")

In [16]:
from qiskit.quantum_info import random_clifford, Clifford

In [17]:
qc_clifford_in = QuantumCircuit(3)
qc_clifford_in.h(2)

qc_clifford_out = rls.synth(qc_clifford_in, num_searches=1000, num_mcts_searches=0, deterministic=False)
qc_clifford_out.draw(fold=-1)

In [18]:
some_clifford = random_clifford(3, seed=42)
qc_rand_clifford_out = rls.synth(some_clifford, num_searches=1000, num_mcts_searches=0, deterministic=False)
qc_rand_clifford_out.draw(fold=-1)

In [19]:
# Equivalent up to phase
np.array_equal(some_clifford.tableau[:,:-1], Clifford(qc_rand_clifford_out).tableau[:,:-1])

True