In [9]:

# simple_qrl.py
import gymnasium as gym
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from qiskit import QuantumCircuit
from qiskit.circuit import ParameterVector
from qiskit_aer import AerSimulator
from qiskit.primitives import Sampler
from qiskit_machine_learning.neural_networks import SamplerQNN
from qiskit_machine_learning.connectors import TorchConnector

# --- Quantum Q-network ---
class QuantumQNetwork(nn.Module):
    def __init__(self, obs_size, n_actions):
        super().__init__()
        self.obs_size = obs_size
        self.n_actions = n_actions
        # prepare Qiskit parameters for encoding and variational
        self.x_params = ParameterVector('x', obs_size)
        self.w_params = ParameterVector('w', obs_size)

        # build circuit: RX for data, RZ for weights, measure Z
        qc = QuantumCircuit(obs_size)
        for i in range(obs_size):
            qc.rx(self.x_params[i], i)
            qc.rz(self.w_params[i], i)
        qc.measure_all()

        # create SamplerQNN and TorchConnector
        sampler = Sampler()
        qnn = SamplerQNN(
            circuit=qc,
            input_params=self.x_params,
            weight_params=self.w_params,
            sampler=sampler,
            output_shape=(obs_size,)
        )
        self.model = TorchConnector(qnn)

    def forward(self, state):
        # state: scalar int or tensor [1]
        if isinstance(state, int):
            s = state
        else:
            s = int(state.item())
        # encode as one-hot angles
        x = torch.zeros(self.obs_size)
        x[s] = np.pi
        x = x.unsqueeze(0)  # batch dim
        # weights: combine nn.Parameter vector
        if not hasattr(self, 'weights'):
            # initialize learnable weights once on first forward
            self.weights = nn.Parameter(torch.randn(self.obs_size) * 2 * np.pi)
            self.register_parameter('weights', self.weights)
        w = self.weights.unsqueeze(0)
        # forward through QNN
        q_vals = self.model(x, w)
        # map q_vals to action values: repeat or simple linear layer
        # here simply pad/truncate to n_actions
        if self.obs_size >= self.n_actions:
            return q_vals[:, :self.n_actions]
        else:
            # replicate last value
            pad = q_vals[:, -1].repeat(1, self.n_actions - self.obs_size)
            return torch.cat([q_vals, pad], dim=1)

# --- Training loop ---
env = gym.make('FrozenLake-v1', is_slippery=False)
obs_size = env.observation_space.n
n_actions = env.action_space.n
qnet = QuantumQNetwork(obs_size, n_actions)
opt = optim.Adam(qnet.parameters(), lr=0.01)
gamma = 0.99

episodes = 100
for episode in range(episodes):
    state, _ = env.reset()
    done = False
    while not done:
        # epsilon-greedy
        if np.random.rand() < 0.1:
            action = env.action_space.sample()
        else:
            with torch.no_grad():
                q_values = qnet(state)
            action = int(torch.argmax(q_values))

        next_state, reward, terminated, truncated, _ = env.step(action)
        done = terminated or truncated

        q_current = qnet(state)[0, action]
        with torch.no_grad():
            q_next_vals = qnet(next_state)
            q_next = torch.max(q_next_vals)
            target = reward + (1 - done) * gamma * q_next
        loss = (q_current - target).pow(2)

        opt.zero_grad()
        loss.backward()
        opt.step()

        state = next_state

    if episode % 10 == 0:
        print(f"Episode {episode} completed")


  sampler = Sampler()
  qnn = SamplerQNN(
No interpret function given, output_shape will be automatically determined as 2^num_virtual_qubits.


TypeError: TorchConnector.forward() takes from 1 to 2 positional arguments but 3 were given

In [None]:
def qrl_circuit(x_params, s_params, w_params, obs_size):
    