In [1]:
!pip install qiskit
!pip install pylatexenc
!pip install qiskit.ignis
!pip install qiskit_machine_learning
!pip install transformers

Collecting qiskit
  Downloading qiskit-0.43.2.tar.gz (9.1 kB)
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting qiskit-terra==0.24.1 (from qiskit)
  Downloading qiskit_terra-0.24.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.9/5.9 MB[0m [31m50.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting qiskit-aer==0.12.1 (from qiskit)
  Downloading qiskit_aer-0.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m54.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting qiskit-ibmq-provider==0.20.2 (from qiskit)
  Downloading qiskit_ibmq_provider-0.20.2-py3-none-any.whl (241 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
from qiskit import *
# Qiskit module
from qiskit import QuantumCircuit
import qiskit.circuit.library as circuit_library
import qiskit.quantum_info as qi
#from qiskit import execute
from qiskit.utils import algorithm_globals
from qiskit.circuit.library import EfficientSU2
from qiskit_machine_learning.neural_networks import SamplerQNN, EstimatorQNN
from qiskit_machine_learning.connectors import TorchConnector
from qiskit.primitives import BackendEstimator
from qiskit import Aer

import torch
from qiskit.circuit import ParameterVector

from torch import Tensor
import torch.nn as nn
import numpy as np
from qiskit.quantum_info import SparsePauliOp

from tqdm.notebook import tqdm

from transformers import AutoTokenizer

from torch.utils.data import DataLoader, Dataset
from tensorflow import compat
import tensorflow_datasets as tfds
import scipy


# Load Dataset

In [3]:
tokenizer = AutoTokenizer.from_pretrained("facebook/data2vec-text-base")
vocab_size = len(tokenizer)

# Loading IMDB Dataset from tensorflow datasets
imdb_data = tfds.load("imdb_reviews", shuffle_files=True)
imdb_train_data = tfds.as_dataframe(imdb_data['train']) #.take(64))
imdb_test_data = tfds.as_dataframe(imdb_data['test']) #.take(64))

Downloading (…)okenizer_config.json:   0%|          | 0.00/1.12k [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]

Downloading and preparing dataset 80.23 MiB (download: 80.23 MiB, generated: Unknown size, total: 80.23 MiB) to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0...


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Generating splits...:   0%|          | 0/3 [00:00<?, ? splits/s]

Generating train examples...:   0%|          | 0/25000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0.incompleteOL6WH0/imdb_reviews-train.tfrecord…

Generating test examples...:   0%|          | 0/25000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0.incompleteOL6WH0/imdb_reviews-test.tfrecord*…

Generating unsupervised examples...:   0%|          | 0/50000 [00:00<?, ? examples/s]

Shuffling /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0.incompleteOL6WH0/imdb_reviews-unsupervised.t…

Dataset imdb_reviews downloaded and prepared to /root/tensorflow_datasets/imdb_reviews/plain_text/1.0.0. Subsequent calls will reuse this data.


In [4]:
Selected_train_data=imdb_train_data.loc[:999]
Selected_test_data=imdb_test_data.loc[:99]

In [5]:
class D2Tset(Dataset):
  def __init__(self, data_df, tokenizer=None, max_seq_length=None, padding=True, transform=None, target_transform=None):
    self.df = data_df
    self.tokenizer = tokenizer
    self.max_seq_length = max_seq_length
    self.transform = transform
    self.target_transform = target_transform
    self.pad = padding

    if self.max_seq_length is not None:
      self.truncation = True
    else:
      self.truncation = False

  def __len__(self):
    return self.df.shape[0]

  def __getitem__(self, idx):
    data = self.df.iloc[idx]
    label = data['label']
    feature = self._txt2vec(data['text'])
    return feature.input_ids.squeeze(0), label

  def _txt2vec(self, txt):
    if isinstance(txt, bytes):
        txt = compat.as_str_any(txt)
    tokenized = self.tokenizer(txt, return_tensors='pt', truncation=self.truncation, padding=self.pad, max_length=self.max_seq_length)
    return tokenized

In [6]:
# Using dataloader for the imdb data
imdb_trainset = D2Tset(data_df=Selected_train_data, tokenizer=tokenizer, max_seq_length=128, padding='max_length', transform=None, target_transform=None)
imdb_trainloader = DataLoader(imdb_trainset, shuffle=True, batch_size=100)

imdb_testset = D2Tset(data_df=Selected_test_data, tokenizer=tokenizer, max_seq_length=128, padding='max_length', transform=None, target_transform=None)
imdb_testloader = DataLoader(imdb_testset, shuffle=False, batch_size=100)
imdb_feature, imdb_label = next(iter(imdb_trainloader))
print(imdb_feature.shape, imdb_label.shape)

torch.Size([100, 128]) torch.Size([100])


# Model

In [7]:
import random
class QRNN_qiskit(torch.nn.Module):
    def __init__(self,S,q_n_h,q_n_d,Denc,D):
        """
        # input: input data
        # weight: trainable parameter
        # q_n_h: # of of qubits for encodong input
        # q_n_d: # of of qubits for memerizing history
        # d: embedding dimension which is equal to n(Denc+2)
        # Denc: the # number of layers for encoding
        # D: the # of layers of variational layers
        # type "K": key, "Q": Query, "V": value
        """
        super().__init__()
        self.seq_num=S
        self.num_q=q_n_h+q_n_d
        self.q_n_d=q_n_d
        self.q_n_h=q_n_h
        self.init_params=torch.nn.Parameter((np.pi/4) * (2 * torch.randn(self.num_q*(D+2)*self.seq_num) - 1))
        self.params_input=ParameterVector('IN',q_n_h*(Denc+2)*self.seq_num)
        self.params_trained_vars=ParameterVector('Vars',self.num_q*(D+2)*self.seq_num)
        self.Denc=Denc
        self.D=D
        self.d=q_n_h*(Denc+2)
        self.params_num_per_lyr=self.num_q*(D+2)
        self.bit_string_Z=SparsePauliOp.from_list([('I'*(self.num_q-1)+'Z', 1)])

        Rqnn=EstimatorQNN(circuit=self.QRNN_cir(),observables=[self.bit_string_Z], input_params=self.params_input, weight_params=self.params_trained_vars)

        self.QRNN_models=TorchConnector(Rqnn, initial_weights=self.init_params)


    def QRNN_cir(self):

        qc=QuantumCircuit(self.num_q)
        for sq in range(self.seq_num):
            start=sq*self.d
            end=(sq+1)*self.d
            self.Feature_map(qc,self.params_input[start:end])
            start=sq*self.params_num_per_lyr
            end=(sq+1)*self.params_num_per_lyr
            self.ansatz(qc,self.params_trained_vars[start:end])
            qc.reset(range(self.q_n_d,self.num_q))

        return qc

    def Feature_map(self,qc,params):
        indx=0
        for j in range(self.q_n_h):
            qc.rx(params[indx],j+self.q_n_d)
            qc.ry(params[indx+1],j+self.q_n_d)
            indx+=2
        for i in range(self.Denc):
            for j in range(self.num_q):
                qc.cx(j,(j+1)%self.num_q)

            for j in range(self.q_n_h):
                #qc.rx(params[indx],j)
                qc.ry(params[indx],j+self.q_n_d)
                indx+=1


    def ansatz(self,qc,params):
        indx=0
        for j in range(self.num_q):
            qc.rx(params[indx],j)
            qc.ry(params[indx+1],j)
            indx+=2
        for i in range(self.D):
            for j in range(self.num_q):
                qc.cx(j,(j+1)%self.num_q)

            for j in range(self.num_q):
                #qc.rx(params[indx],j)
                qc.ry(params[indx],j)
                indx+=1

    def forward(self,input):

        x=self.QRNN_models(input)
        return torch.sigmoid(x)



# Train Real Data

In [8]:
def binary_accuracy(preds, y):
    """
    Returns accuracy per batch, i.e. if you get 8/10 right, this returns 0.8, NOT 8
    """

    #round predictions to the closest integer
    rounded_preds = (torch.round(torch.sign(preds-0.5))+1)//2
    correct = (rounded_preds == y).float() #convert into float for division
    acc = correct.sum() / len(correct)
    return acc

In [9]:
model = QRNN_qiskit(8,4,1,2,1)

  self._weights.data = torch.tensor(initial_weights, dtype=torch.float)


In [10]:
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(trainable_params)

240


In [11]:
train_loss = []
optimizer = torch.optim.Adam(lr=0.03, params=model.parameters())
criterion = nn.BCELoss()#torch.nn.CrossEntropyLoss() #nn.BCELoss()
pbar = tqdm(total=len(imdb_trainloader), leave=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.train()
model.to(device)
n_epochs=1
for epoch in range(n_epochs):
  for batch, (feature, label) in enumerate(imdb_trainloader):
    feature, label = feature.to(device), label.to(device)
    feature=feature/50000
    #print(feature)
    optimizer.zero_grad()
    predictions = model(feature.squeeze())
    loss = criterion(predictions.squeeze(), label.float())
    print(loss)
    loss.backward()
    optimizer.step()
    pbar.update()
    pbar.desc = f"Epoch: {epoch} | Batch: {batch} | Loss {loss}"
    train_loss.append(loss.cpu().detach().numpy())

  0%|          | 0/10 [00:00<?, ?it/s]

tensor(0.7575, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.7247, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.7793, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.7468, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.7562, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.7848, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.6955, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.7141, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.7888, grad_fn=<BinaryCrossEntropyBackward0>)
tensor(0.6971, grad_fn=<BinaryCrossEntropyBackward0>)


In [13]:
acc=0
pbar = tqdm(total=len(imdb_trainloader), leave=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.eval()
model.to(device)
for batch, (feature, label) in enumerate(imdb_trainloader):
  preds = []
  labels = []
  feature, label = feature.to(device), label.to(device)
  with torch.no_grad():
    feature=feature/50000
    predictions = model(feature.squeeze())
    preds.append(predictions.cpu().numpy())
    labels.append(label.cpu().numpy())
  preds = np.array([1 if pred >=0.5 else 0 for pred in preds[0]])
  labels = np.array(labels)
  acc+= (preds == labels).sum() / len(preds)

  pbar.update()
  pbar.desc = f"Batch: {batch}"
pbar.refresh()
pbar.close()
acc/10

  0%|          | 0/10 [00:00<?, ?it/s]

0.507

In [15]:
acc=0
preds = []
labels = []
pbar = tqdm(total=len(imdb_testloader), leave=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.eval()
model.to(device)
for batch, (feature, label) in enumerate(imdb_testloader):
  feature, label = feature.to(device), label.to(device)
  with torch.no_grad():
    feature=feature/50000
    predictions = model(feature.squeeze())
    preds.append(predictions.cpu().numpy())
    labels.append(label.cpu().numpy())
  pbar.update()
  pbar.desc = f"Batch: {batch}"
pbar.refresh()
pbar.close()
preds = np.array([1 if pred >=0.5 else 0 for pred in preds[0]])
labels = np.array(labels)
acc = (preds == labels).sum() / len(preds)
print(acc)

  0%|          | 0/1 [00:00<?, ?it/s]

0.52
