# Quantum Circuit Optimization with Reinforcement Learning on qBraid


In [1]:
import tensorflow_quantum as tfq

ModuleNotFoundError: No module named 'tensorflow_quantum'

## What is quantum circuit optimization?

Quantum circuits can often have Gates which when applied successively can result in identities.

## Why is it so hard?
Circuit optimization is difficult due to the nature of the increasing complexity of large circuits, the ambiguity of the reward function, as well as the larger search space for the optimal circuit with the minimal gate count and circuit depth.

### What are some existing techniques??

#### T-OPT

#### ZX Calculus

#### 

In [2]:
%pip install seaborn

[0mCollecting seaborn
  Downloading seaborn-0.12.2-py3-none-any.whl (293 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m293.3/293.3 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[0mInstalling collected packages: seaborn
Successfully installed seaborn-0.12.2

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [3]:
import seaborn as sns
import tensorflow as tf
import numpy as np

In [2]:
## Build random circuit dataset

# Define the quantum circuit
def create_quantum_circuit():
    qubits = cirq.GridQubit.rect(2, 1)  # Example with 2 qubits
    circuit = cirq.Circuit(
        cirq.X(qubits[0]),
        cirq.X(qubits[0]),
        cirq.Y(qubits[0]),
        cirq.CNOT(qubits[0], qubits[1]),
        cirq.measure(qubits[0], key='m0'),
        cirq.measure(qubits[1], key='m1')
    )
    return circuit


In [3]:
## Define gate set to for assembly instruction / reward function

# Define the reward function
def compute_reward(result):
    target_state = np.array([1 / np.sqrt(2), 0, 0, 1 / np.sqrt(2)])  # Example target states
    probabilities = np.abs(result.state_vector()) ** 2
    state = np.array([probabilities[0], probabilities[2], probabilities[1], probabilities[3]])
    fidelity = np.abs(np.dot(target_state, state)) ** 2
    return fidelity


In [5]:
## 

import random
import numpy as np
import tensorflow as tf
import cirq

class RLAgent:
    def __init__(self, gate_set):
        self.gate_set = gate_set
        self.model = self.build_model()

    def build_model(self):
        num_actions = len(self.gate_set)
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(16, activation='relu', input_shape=(num_actions,)),
            tf.keras.layers.Dense(16, activation='relu'),
            tf.keras.layers.Dense(1)
        ])
        model.compile(optimizer='adam', loss='mse')
        return model

    def update(self, states, rewards):
        self.model.fit(states, rewards, epochs=1, verbose=0)

    def get_action(self, state):
        action_values = self.model.predict(np.array([state]))[0]
        action_index = np.argmax(action_values)
        action = self.gate_set[action_index]
        return action

def create_quantum_circuit(gate_set, num_gates):
    qubits = cirq.LineQubit.range(num_gates)
    circuit = cirq.Circuit()
    for _ in range(num_gates):
        gate = random.choice(gate_set)
        target = random.choice(qubits)
        circuit.append(gate(target))
    return circuit

def compute_reward(result):
    # Define your reward computation logic here
    pass

class QuantumEnvironment:
    def __init__(self, gate_set, num_gates):
        self.gate_set = gate_set
        self.num_gates = num_gates

    def run(self, gates):
        circuit = create_quantum_circuit(self.gate_set, self.num_gates)
        circuit.append(gates)
        simulator = cirq.Simulator()
        result = simulator.run(circuit)
        reward = compute_reward(result)
        return reward

# Main training loop
def train_agent(agent, environment, num_episodes):
    rewards = []
    for episode in range(num_episodes):
        gates = []
        for _ in range(agent.num_gates):
            state = np.eye(len(agent.gate_set))[gates]
            action = agent.get_action(state)
            gates.append(action)
        reward = environment.run(gates)
        agent.update(np.eye(len(agent.gate_set))[gates], np.array([reward]))
        rewards.append(reward)
    return rewards

# Define the gate set for assembly instructions
gate_set = [
    cirq.X,
    cirq.Y,
    cirq.Z,
    cirq.H,
    cirq.CNOT,
]

# Create the agent and environment
num_gates = 10
agent = RLAgent(gate_set)
environment = QuantumEnvironment(gate_set, num_gates)

# Train the agent
num_episodes = 100
training_rewards = train_agent(agent, environment, num_episodes)

# Print the training rewards
for episode, reward in enumerate(training_rewards):
    print(f"Episode {episode+1}: Reward = {reward}")


2023-06-29 19:09:47.693268: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2023-06-29 19:09:47.693320: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2023-06-29 19:09:47.693351: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (jupyter-rickyyoung-40qbraid-2ecom): /proc/driver/nvidia/version does not exist
2023-06-29 19:09:47.693679: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


AttributeError: 'RLAgent' object has no attribute 'num_gates'

In [None]:
## The assembly instruction is the set of gates you are interested in using



# Define the quantum environment
class QuantumEnvironment:
    def __init__(self):
        self.circuit = create_quantum_circuit()

    def run(self, params):
        simulator = cirq.Simulator()
        resolved_circuit = cirq.resolve_parameters(self.circuit, params)
        result = simulator.simulate(resolved_circuit)
        reward = compute_reward(result)
        return reward

# Define the reinforcement learning agent
class RLAgent:
    def __init__(self):
        self.model = self.build_model()

    def build_model(self):
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(16, activation='relu', input_shape=(2,)),
            tf.keras.layers.Dense(16, activation='relu'),
            tf.keras.layers.Dense(1)
        ])
        model.compile(optimizer='adam', loss='mse')
        return model

    def update(self, states, rewards):
        self.model.fit(states, rewards, epochs=1, verbose=0)

    def get_action(self, state):
        action = self.model.predict(np.array([state]))
        return action

# Main training loop
def train_agent(agent, num_episodes):
    environment = QuantumEnvironment()
    rewards = []
    for episode in range(num_episodes):
        state = np.random.uniform(low=-np.pi, high=np.pi, size=(2,))
        action = agent.get_action(state)
        reward = environment.run(action)
        rewards.append(reward)
        agent.update(np.array([state]), np.array([reward]))
    return rewards

# Create the agent and train it
agent = RLAgent()
num_episodes = 100
training_rewards = train_agent(agent, num_episodes)

# Print the training rewards
for episode, reward in enumerate(training_rewards):
    print(f"Episode {episode+1}: Reward = {reward}")


In this code, we define a quantum circuit using Cirq and a reward function that computes the fidelity between the output state of the circuit and a target state. The QuantumEnvironment class represents the quantum environment in which the agent interacts. The RLAgent class represents the reinforcement learning agent, which uses a neural network model built with TensorFlow to approximate the optimal action given a state. The train_agent function is the main training loop, where the agent interacts with the environment and updates its model based on the observed rewards.

Please note that this is just a basic starter code, and you may need to modify and extend it depending on your specific requirements and the complexity of the quantum circuits you want to optimize.

Ref:

https://arxiv.org/pdf/2103.07585.pdf