In [None]:
!pip install qiskit
!pip install pylatexenc
!pip install qiskit.ignis
!pip install qiskit_machine_learning
!pip install transformers

  Building wheel for fastdtw (setup.py) ... [?25l[?25hdone
  Created wheel for fastdtw: filename=fastdtw-0.3.4-cp310-cp310-linux_x86_64.whl size=517928 sha256=70ae52416846be756e198ac804af0adbbbd2dc09769d5adccbe94f060a8e150f
  Stored in directory: /root/.cache/pip/wheels/73/c8/f7/c25448dab74c3acf4848bc25d513c736bb93910277e1528ef4
Successfully built fastdtw
Installing collected packages: fastdtw, qiskit_machine_learning
Successfully installed fastdtw-0.3.4 qiskit_machine_learning-0.6.1
Collecting transformers
  Downloading transformers-4.30.2-py3-none-any.whl (7.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m60.4 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)
  Downloading huggingface_hub-0.15.1-py3-none-any.whl (236 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m236.8/236.8 kB[0m [31m29.3 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformer

In [None]:
from qiskit import *
# Qiskit module
from qiskit import QuantumCircuit
import qiskit.circuit.library as circuit_library
import qiskit.quantum_info as qi
#from qiskit import execute
from qiskit.utils import algorithm_globals
from qiskit.circuit.library import EfficientSU2
from qiskit_machine_learning.neural_networks import SamplerQNN, EstimatorQNN
from qiskit_machine_learning.connectors import TorchConnector
import torch
from qiskit.circuit import ParameterVector, Parameter
from qiskit.circuit.parametervector import ParameterVectorElement
from torch import Tensor
import torch.nn as nn
import numpy as np
from qiskit.quantum_info import SparsePauliOp
from tqdm.notebook import tqdm

from transformers import AutoTokenizer

from torch.utils.data import DataLoader, Dataset
from tensorflow import compat
import tensorflow_datasets as tfds

In [None]:
# Fetching pre-trained tokenizer and imdb dataset
tokenizer = AutoTokenizer.from_pretrained("facebook/data2vec-text-base")
vocab_size = len(tokenizer)

# Loading IMDB Dataset from tensorflow datasets
imdb_data = tfds.load("imdb_reviews", shuffle_files=True)
imdb_train_data = tfds.as_dataframe(imdb_data['train']) #.take(64))
imdb_test_data = tfds.as_dataframe(imdb_data['test']) #.take(64))

In [None]:
class D2Tset(Dataset):
  def __init__(self, data_df, tokenizer=None, max_seq_length=None, padding=True, transform=None, target_transform=None):
    self.df = data_df
    self.tokenizer = tokenizer
    self.max_seq_length = max_seq_length
    self.transform = transform
    self.target_transform = target_transform
    self.pad = padding

    if self.max_seq_length is not None:
      self.truncation = True
    else:
      self.truncation = False

  def __len__(self):
    return self.df.shape[0]

  def __getitem__(self, idx):
    data = self.df.iloc[idx]
    label = data['label']
    feature = self._txt2vec(data['text'])
    return feature.input_ids.squeeze(0), label

  def _txt2vec(self, txt):
    if isinstance(txt, bytes):
        txt = compat.as_str_any(txt)
    tokenized = self.tokenizer(txt, return_tensors='pt', truncation=self.truncation, padding=self.pad, max_length=self.max_seq_length)
    return tokenized

In [None]:
# Using dataloader for the imdb data
imdb_trainset = D2Tset(data_df=imdb_train_data, tokenizer=tokenizer, max_seq_length=128, padding='max_length', transform=None, target_transform=None)
imdb_trainloader = DataLoader(imdb_trainset, shuffle=True, batch_size=1)

imdb_testset = D2Tset(data_df=imdb_test_data, tokenizer=tokenizer, max_seq_length=128, padding='max_length', transform=None, target_transform=None)
imdb_testloader = DataLoader(imdb_testset, shuffle=False, batch_size=1)
imdb_feature, imdb_label = next(iter(imdb_trainloader))
print(imdb_feature.shape, imdb_label.shape)

In [None]:
class qrnn(nn.Module):

  def __init__(self, n_qrbs, n_qubs, vocab_size, staggered: bool=False):
    super().__init__()
    self.n_qrbs = n_qrbs
    self.n_qubits = n_qubs
    self.staggered = staggered
    #self.embed_layer = nn.Embedding(vocab_size, n_qrbs, max_norm=1.0)
    self.qc_init()
    input_params = list(filter(lambda x: not isinstance(x, ParameterVectorElement), self.qc.parameters.data))
    weight_params = list(filter(lambda x: isinstance(x, ParameterVectorElement), self.qc.parameters.data))
    self.qnn = EstimatorQNN(circuit=self.qc, input_params=input_params, weight_params=weight_params, input_gradients=True)
    self.qrnn = TorchConnector(self.qnn)
    self.softmax = nn.Softmax(dim=0)

  def qc_init(self):
    self.regD = QuantumRegister(self.n_qubits, 'regD')
    self.regH = QuantumRegister(self.n_qubits, 'regH')
    self.regY = ClassicalRegister(self.n_qubits, 'regY')
    self.qc = QuantumCircuit(self.regD, self.regH, self.regY)
    self.theta_matrix = np.array([ParameterVector(f'θ{i}', length=3) for i in range(self.n_qubits*2)])
    self.gamma_vec = ParameterVector('γ', length=self.n_qubits*2+1)
    self.theta = Parameter('θ')
    self.angencode()
    self.apply_ansatz()

  def angencode(self):
    #theta = torch.atan(xt).tolist()[0]
    for i in range(self.n_qubits):
      self.qc.ry(self.theta, self.regD[i])

  def apply_ansatz(self):
    qubits = self.qc.qubits

    for i in range(self.n_qubits*2):  # Initial circuit rotations with parameters
      self.qc.rx(self.theta_matrix[i][0], qubits[i])
      self.qc.rz(self.theta_matrix[i][1], qubits[i])
      self.qc.rx(self.theta_matrix[i][2], qubits[i])

    for i in range(1, self.n_qubits*2):
      self.qc.rzz(self.gamma_vec[i], qubits[i-1], qubits[i])

    self.qc.rzz(self.gamma_vec[-1], qubits[-1], qubits[0])

  def measurement(self):
    self.qc.measure(self.regD, self.regY)
    # self.qc.reset(self.regD)

  def regreset(self):
    self.qc.reset(self.regD)

  def forward(self, x):
    y = []
    for i in range(self.n_qrbs):
      #self.angencode(x[i])
      theta = torch.atan(x[i]).view(1)
      out = self.softmax(self.qrnn(theta))
      y.append(out)
      # self.regreset()
      # self.measurement()
      # self.qc_init()
      # y.append(self.regY[0])
      # print(y)
    return y[-1]

In [None]:
def model_trainer(model, n_epochs, trainloader):
  '''
  Model trainer to train QRNN
  Parameters:
    model (PyTorch Model): QRNN model for text or image classification with correct sizes specified
    n_epochs (int): Number of epochs to train for.
    trainloader (PyTorch Dataloader): Dataloader containing the dataset.
  '''
  train_loss = []
  optimizer = torch.optim.Adam(lr=0.03, params=model.parameters())
  criterion = nn.BCELoss()
  pbar = tqdm(total=len(trainloader), leave=True)
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  model.train()
  model.to(device)
  for epoch in range(n_epochs):
    for batch, (feature, label) in enumerate(trainloader):
      feature, label = feature.to(device), label.to(device)
      optimizer.zero_grad()
      predictions = model(feature.squeeze())
      loss = criterion(predictions, label.float())
      loss.backward()
      optimizer.step()
      pbar.update()
      pbar.desc = f"Epoch: {epoch} | Batch: {batch} | Loss {loss}"
      train_loss.append(loss.cpu().detach().numpy())
      #print(acc)
      #print(loss)
    pbar.refresh()
  pbar.close()
  return model, train_loss


def model_tester(model, testloader):
  '''
  Model test to train QRNN
  Parameters:
    model (PyTorch Model): QRNN model for text or image classification with correct sizes specified
    testloader (PyTorch Dataloader): Dataloader containing the test dataset.
  '''
  preds = []
  labels = []
  pbar = tqdm(total=len(testloader), leave=True)
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  model.eval()
  model.to(device)
  for batch, (feature, label) in enumerate(testloader):
    feature, label = feature.to(device), label.to(device)
    with torch.no_grad():
      predictions = model(feature.squeeze())
      preds.append(predictions.cpu().numpy())
      labels.append(label.cpu().numpy())
    pbar.update()
    pbar.desc = f"Batch: {batch}"
  pbar.refresh()
  pbar.close()
  preds = np.array([1 if pred>=0.5 else 0 for pred in preds])
  labels = np.array(labels)
  acc = (preds == labels).sum() / len(preds)
  return preds, acc, labels

In [None]:
# Defining and training the model
model = qrnn(n_qrbs=128, n_qubs=3, vocab_size=vocab_size, staggered=False)
trained_model, train_loss = model_trainer(model, 50, imdb_trainloader)

In [None]:
# Calculating accuracy of model
preds, accuracy, labels = model_tester(trained_model, imdb_testloader)
print(f"Accuracy: {accuracy}")

In [None]:
# Saving model
torch.save(model.state_dict(), "QRNN_STATE.pt")