In [2]:
!pip list | grep qiskit

qiskit                        0.46.1
qiskit-aer                    0.13.3
qiskit-ibm-runtime            0.20.0
qiskit-terra                  0.46.1


In [1]:
from qiskit_ibm_runtime import QiskitRuntimeService, Options, Sampler, Session, Estimator
from qiskit import QuantumCircuit

from qiskit_aer import AerSimulator


from qiskit import IBMQ, Aer

from qiskit_ibm_runtime.fake_provider import FakeManilaV2

from qiskit.transpiler.preset_passmanagers import generate_preset_pass_manager

from qiskit.quantum_info import SparsePauliOp

from torchquantum.measurement import expval_joint_analytical

from torchquantum.measurement import expval_joint_sampling

In [8]:
from dotenv import dotenv_values

config = dotenv_values(".env")

In [10]:
service = QiskitRuntimeService(channel="ibm_quantum", token=config["IBM_TOKEN"])

In [23]:
service.backends()

[<IBMBackend('simulator_extended_stabilizer')>,
 <IBMBackend('simulator_mps')>,
 <IBMBackend('simulator_statevector')>,
 <IBMBackend('simulator_stabilizer')>,
 <IBMBackend('ibm_brisbane')>,
 <IBMBackend('ibm_kyoto')>,
 <IBMBackend('ibm_osaka')>,
 <IBMBackend('ibmq_qasm_simulator')>]

In [4]:
#service = QiskitRuntimeService()


In [24]:
backend = service.backend("ibmq_qasm_simulator")

In [25]:
import math
import pdb
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pad_sequence
import torchquantum as tq
import qiskit_aer


In [26]:
from torchquantum.plugin.qiskit import tq2qiskit

In [27]:
import torchquantum.functional as tqf
import argparse
import tqdm
import time

import torch
import torch.nn.functional as F
#import torchtext.legacy
#from torchtext.legacy import data, datasets, vocab

In [28]:
class MultiHeadAttentionBase(nn.Module):
    def __init__(self,
                 embed_dim: int,
                 num_heads: int,
                 dropout: float = 0.1,
                 mask=None,
                 use_bias=False):
        super(MultiHeadAttentionBase, self).__init__()

        assert embed_dim % num_heads == 0, f"Embedding dimension ({embed_dim}) should be divisible by number of heads ({num_heads})"

        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.d_k = embed_dim // num_heads  # projection dimensions
        self.k_linear = None
        self.q_linear = None
        self.v_linear = None
        self.combine_heads = None
        self.dropout = nn.Dropout(dropout)
        self.attn_weights = None
    
    def separate_heads(self, x):
        '''
        split into N heads
        from (batch_size, seq_len, embed_dim)
        to   (batch_size, seq_len, num_heads, embed_dim)
        then transpose (1,2) to (batch_size, num_heads, seq_len, embed_dim)
        to make mat mult straightforward for each head
        '''
        batch_size = x.size(0)
        x = x.view(batch_size, -1, self.num_heads, self.d_k)
        return x.transpose(1, 2)

    def attention(self, query, key, value, mask=None, dropout=None):
        '''
        Attention(Q, K, V) = softmax(Q K^T / sqrt(d_k))V
        '''
        scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(self.d_k)
        # see also: https://tensorchiefs.github.io/dlday2018/tutorial/einsum.html
        #scores = torch.einsum('bijh, bkjh -> bikh', query, key) / math.sqrt(self.d_k)
        if mask is not None:
            mask = mask.unsqueeze(1)
            scores = scores.masked_fill(mask == 0, -1e9)
        scores = F.softmax(scores, dim=-1)
        if dropout is not None:
            scores = dropout(scores)
        attn = torch.matmul(scores, value)
        return attn, scores
    
    def downstream(self, query, key, value, batch_size, mask=None):
        Q = self.separate_heads(query)
        K = self.separate_heads(key)
        V = self.separate_heads(value)

        x, self.attn_weights = self.attention(Q, K, V, mask, dropout=self.dropout)

        concat = x.transpose(1, 2).contiguous().view(batch_size, -1, self.embed_dim)

        return concat
        # output = self.combine_heads(concat)
        # return output

   # def forward(self, x, mask=None):
    #    raise NotImplementedError("Base class does not execute forward function.")
        
        


In [29]:
class MultiHeadAttentionClassical(MultiHeadAttentionBase):
    
    def __init__(self, embed_dim: int,
                 num_heads: int,
                 dropout=0.1,
                 mask=None,
                 use_bias=False):
        super(MultiHeadAttentionClassical, self).__init__(embed_dim=embed_dim, num_heads=num_heads, dropout=dropout, mask=mask, use_bias=use_bias)

        self.k_linear = nn.Linear(embed_dim, embed_dim, bias=use_bias)
        self.q_linear = nn.Linear(embed_dim, embed_dim, bias=use_bias)
        self.v_linear = nn.Linear(embed_dim, embed_dim, bias=use_bias)
        self.combine_heads = nn.Linear(embed_dim, embed_dim, bias=use_bias)
    
    def forward(self, x, mask=None):
        batch_size, seq_len, embed_dim = x.size()
        assert embed_dim == self.embed_dim, f"Input embedding ({embed_dim}) does not match layer embedding size ({self.embed_dim})"

        K = self.k_linear(x)
        Q = self.q_linear(x)
        V = self.v_linear(x)

        x = self.downstream(Q, K, V, batch_size, mask)
        output = self.combine_heads(x)
        return output

In [30]:
class QLayer(tq.QuantumModule):
        def __init__(self, n_qbits, *args, **kwargs):
            super().__init__()    
            self.n_wires = n_qbits
            self.encoder = tq.GeneralEncoder(
                    [{'input_idx': [i], 'func': 'rx', 'wires': [i]} for i in range(self.n_wires)])
            #self.rx_list = [tq.RX(has_params=True, trainable=True) for _ in range(self.n_wires)]
            #self.ry_test = tq.RY(has_params=True, trainable=True)
            #self.measure = tq.MeasureAll(tq.PauliZ)
            if n_qbits >= 2:
                self.rx_0 = tq.RX(has_params=True, trainable=True)
                self.rx_1 = tq.RX(has_params=True, trainable=True)
            if n_qbits >= 4:
                self.rx_2 = tq.RX(has_params=True, trainable=True)
                self.rx_3 = tq.RX(has_params=True, trainable=True)

        def ansatz_gate_forward(self, q_device):
            if self.n_wires >= 2:
                self.rx_0(q_device, wires=0)
                self.rx_1(q_device, wires=1)
            if self.n_wires >= 4:
                self.rx_2(q_device, wires=2)
                self.rx_3(q_device, wires=3)

        @tq.static_support
        def forward(self, q_device, x):
            self.encoder(q_device, x)
            #for k in range(self.n_wires):
                 #self.rx_list[k](q_device, wires=k)
            #self.ry_test(q_device, wires=0)
            self.ansatz_gate_forward(q_device)

            for k in range(self.n_wires):
                if k==self.n_wires-1:
                    tqf.cnot(q_device, wires=[k, 0], static=self.static_mode, parent_graph=self.graph) 
                else:
                    tqf.cnot(q_device, wires=[k, k+1], static=self.static_mode, parent_graph=self.graph)
            q_device = q_device.bfloat16()
            #output = tq.MeasureAll(tqf.PauliZ)(q_device) 

In [31]:
q_layer = QLayer(4)

In [32]:
x_1 = torch.tensor(np.random.rand(1, 4), dtype=torch.float32)
x_2 = torch.tensor(np.random.rand(1, 4), dtype=torch.float32)
x_3 = torch.tensor(np.random.rand(1, 4), dtype=torch.float32)
q_dev = tq.QuantumDevice(n_wires=4, device="cpu", bsz=x_1.shape[0])

In [33]:
circuit_1 = tq2qiskit(q_device=q_dev, m=q_layer, x=x_1)
circuit_2 = tq2qiskit(q_device=q_dev, m=q_layer, x=x_2)
circuit_3 = tq2qiskit(q_device=q_dev, m=q_layer, x=x_3)

In [34]:
circuit_3.draw()

In [None]:
"""real_hardware = service.backend("ibm_brisbane")
aer = AerSimulator.from_backend(real_hardware)"""

In [35]:
options = Options(optimization_level=1, execution={"shots":4000})

In [53]:
from qiskit import QuantumCircuit
 
qc = QuantumCircuit(2, 2)
qc.h(0)
#qc.h(1)
#qc.h(0)
#qc.h(1)
#qc.x(0)
#qc.x(1)
#qc.cnot(0,1)
qc.draw()

In [54]:
pm = generate_preset_pass_manager(backend=backend, optimization_level=1)
circ_test = pm.run(qc)

In [55]:
observables = ["ZI", "IZ"]


with Session(service=service, backend=backend) as session:

    estimator = Estimator(session=session, options=options)
    job = estimator.run(circuits=[circ_test, circ_test], observables=observables)
    print(f"Estimator results: {job.result()}")

  job = estimator.run(circuits=[circ_test, circ_test], observables=observables)


Estimator results: EstimatorResult(values=array([ 1.  , -0.02]), metadata=[{'variance': 0.0, 'shots': 4000}, {'variance': 0.9996, 'shots': 4000}])


In [36]:
pm = generate_preset_pass_manager(backend=backend, optimization_level=1)
circ_pc_1 = pm.run(circuit_1)
circ_pc_2 = pm.run(circuit_2)
circ_pc_3 = pm.run(circuit_3)

In [56]:
#observables = ["ZZZZ", "ZZZZ", "ZZZZ"]

#H1 = SparsePauliOp.from_list([("ZIII", 1), ("IZII", 2), ("XIII", 3)])

#observables = [H1, H1, H1]

observables = ["ZIII", "ZIII", "ZIII"]


with Session(service=service, backend=backend) as session:

    estimator = Estimator(session=session, options=options)
    job = estimator.run(circuits=[circ_pc_1, circ_pc_1, circ_pc_1], observables=observables)
    print(f"Estimator results: {job.result()}")

  job = estimator.run(circuits=[circ_pc_1, circ_pc_1, circ_pc_1], observables=observables)


Estimator results: EstimatorResult(values=array([-0.22  , -0.239 , -0.2145]), metadata=[{'variance': 0.9516, 'shots': 4000}, {'variance': 0.942879, 'shots': 4000}, {'variance': 0.95398975, 'shots': 4000}])


In [57]:
np.squeeze([job.result().values for i in range(3)])

array([[-0.22  , -0.239 , -0.2145],
       [-0.22  , -0.239 , -0.2145],
       [-0.22  , -0.239 , -0.2145]])

In [47]:
np.squeeze([job.result().values for i in range(3)])

array([[ 0.8046875 ,  0.        , -0.06640625],
       [ 0.8046875 ,  0.        , -0.06640625],
       [ 0.8046875 ,  0.        , -0.06640625]])

In [None]:
with Session(service=service, backend=backend) as session:
       # Submit a request to the Estimator primitive within the session.
    estimator = Estimator(session=session, options=options)
    job = estimator.run(circuits=[circ_pc], observables=["ZZZZ"])
    print(f"Estimator results: {job.result()}")

In [312]:
class MultiHeadAttentionQuantum(MultiHeadAttentionBase):
    
            
    def __init__(self,
                 embed_dim: int,
                 num_heads: int,
                 dropout=0.1,
                 mask=None,
                 use_bias=False,
                 n_qubits: int = 4,
                 n_qlayers: int = 1,
                 q_device="default.qubit",
                 session: Session = None):
        super(MultiHeadAttentionQuantum, self).__init__(embed_dim, num_heads, dropout=dropout, mask=mask, use_bias=use_bias)
        
        # todo: add intermediate layer to "dress" quantum circuit
        assert n_qubits == embed_dim, "Number of qubits ({n_qubits}) does not match embedding dim ({embed_dim})"
        self.n_qubits = n_qubits
        #self.n_qlayers = n_qlayers
        self.k_layer = QLayer(n_qubits)
        self.q_layer = QLayer(n_qubits)
        self.v_layer = QLayer(n_qubits)
        #self.measure = tq.MeasureAll(tq.PauliZ)
        self.q_device = q_device
        self.session = session

    def forward(self, x, mask=None):
        batch_size, seq_len, embed_dim = x.size()
        assert embed_dim == self.embed_dim, f"Input embedding ({embed_dim}) does not match layer embedding size ({self.embed_dim})"

        q_dev = tq.QuantumDevice(n_wires=self.n_qubits, device=self.q_device, bsz=x.shape[0])

        options = Options(optimization_level=1, execution={})

        observable = SparsePauliOp("Z" * self.n_qubits)
        estimator = Estimator(session=self.session, options=options)
        
        K = [estimator.run(circuits=[tq2qiskit(q_device=q_dev, m=self.k_layer, x=x[:, t, :].clone())],
                                     observables=[observable]).result().values for t in range(seq_len)]
        
        Q = [estimator.run(circuits=[tq2qiskit(q_device=q_dev, m=self.q_layer, x=x[:, t, :].clone())],
                                     observables=[observable]).result().values for t in range(seq_len)]
        
        V = [estimator.run(circuits=[tq2qiskit(q_device=q_dev, m=self.v_layer, x=x[:, t, :].clone())],
                                     observables=[observable]).result().values for t in range(seq_len)]
        
        print(K)
        
        #K = [self.q_layer(x[:, t, :].clone(),q_dev) for t in range(seq_len)]
        #Q = [self.q_layer(x[:, t, :].clone(),q_dev) for t in range(seq_len)]
        #V = [self.q_layer(x[:, t, :].clone(),q_dev) for t in range(seq_len)]

        K = torch.Tensor(pad_sequence(K))
        Q = torch.Tensor(pad_sequence(Q))
        V = torch.Tensor(pad_sequence(V))
        x = self.downstream(Q, K, V, batch_size, mask)
        #output = [self.q_layer(x[:, t, :],q_dev) for t in range(seq_len)]
        #output = torch.Tensor(pad_sequence(output)).clone()
        return x


In [313]:
EMBED_DIM = 4

BATCH_SIZE = 1

SEQ_LEN = 3


In [314]:
classical_module = MultiHeadAttentionClassical(embed_dim=EMBED_DIM, num_heads=4, dropout=0.0)
#quantum_module = MultiHeadAttentionQuantum(embed_dim=EMBED_DIM, num_heads=4, dropout=0.0, n_qubits=EMBED_DIM, q_device="cuda", session=session)

In [315]:
test_input = torch.tensor(np.random.rand(BATCH_SIZE, SEQ_LEN, EMBED_DIM), dtype=torch.float32)

In [316]:
output = classical_module(test_input)

In [317]:
output.shape

torch.Size([1, 3, 4])

In [318]:
backend = service.backend("ibmq_qasm_simulator")

In [319]:
with Session(service=service, backend=backend) as session:
    quantum_module = MultiHeadAttentionQuantum(embed_dim=EMBED_DIM, num_heads=4, dropout=0.0, n_qubits=EMBED_DIM, q_device="cuda", session=session)
    output_q = quantum_module(test_input)

  K = [estimator.run(circuits=[tq2qiskit(q_device=q_dev, m=self.k_layer, x=x[:, t, :].clone())],
  Q = [estimator.run(circuits=[tq2qiskit(q_device=q_dev, m=self.q_layer, x=x[:, t, :].clone())],
  V = [estimator.run(circuits=[tq2qiskit(q_device=q_dev, m=self.v_layer, x=x[:, t, :].clone())],


[array([-0.61328125]), array([-0.95117188]), array([-0.52539062])]


TypeError: expected Tensor as element 0 in argument 0, but got numpy.ndarray

In [74]:
output_q.shape

torch.Size([4, 20, 8])

In [76]:
tq2qiskit(q_device=tq.QuantumDevice(n_wires=EMBED_DIM, bsz=BATCH_SIZE),m=q_layer)

TypeError: QLayer.forward() missing 1 required positional argument: 'q_device'