In [1]:
!pip install qiskit
!pip install torch torchvision
!pip install pennylane


Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable


SAMPLE SENTENCE TO EMBED USING BERT TOKENIZATION

In [1]:
from transformers import BertTokenizer, BertModel
import torch

# Load BERT base uncased model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert = BertModel.from_pretrained('bert-base-uncased')

# Freeze BERT weights for testing
for param in bert.parameters():
    param.requires_grad = False

# Sample sentence
sentence = "This movie was absolutely amazing!"

# Tokenize and get embeddings
tokens = tokenizer(sentence, return_tensors="pt", truncation=True, padding='max_length', max_length=32)
with torch.no_grad():
    outputs = bert(**tokens)

# Take the [CLS] token as sentence representation
embedding = outputs.last_hidden_state[:, 0, :]  # shape: [1, 768]
print("BERT embedding shape:", embedding.shape)


BERT embedding shape: torch.Size([1, 768])


In [None]:
LINEAR DIMENSION REDUCTION

In [2]:
import torch.nn as nn

# Linear projection from 768 -> 4 (for 4 qubits)
reduce_dim = nn.Linear(768, 4)

# Apply projection
reduced_embedding = reduce_dim(embedding)
print("Reduced embedding shape:", reduced_embedding.shape)


Reduced embedding shape: torch.Size([1, 4])


In [None]:
QUANTUM IMPLEMENTATION

In [None]:
SET UP OF QUANTUM LAYER

In [3]:
import torch
from torch import nn
from transformers import BertTokenizer, BertModel
import pennylane as qml
import numpy as np


In [4]:
# Quantum Layer
n_qubits = 4
dev = qml.device("default.qubit", wires=n_qubits)

@qml.qnode(dev, interface="torch")
def quantum_circuit(inputs):
    for i in range(n_qubits):
        qml.Hadamard(wires=i)
        qml.RY(np.pi * inputs[i], wires=i)

    for i in range(n_qubits):
        qml.CNOT(wires=[i, (i+1) % n_qubits])

    return [qml.expval(qml.PauliZ(i)) for i in range(n_qubits)]

class QuantumLayer(nn.Module):
    def forward(self, inputs):
        return torch.stack([quantum_circuit(x) for x in inputs])


In [5]:
# BERT + Quantum Hybrid Model
class BertQuantumClassifier(nn.Module):
    def __init__(self, n_qubits=4):
        super(BertQuantumClassifier, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.pre_classifier = nn.Linear(768, n_qubits)
        self.quantum_layer = QuantumLayer()
        self.classifier = nn.Linear(n_qubits, 2)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = outputs.last_hidden_state[:, 0]           # [CLS] token
        reduced = self.pre_classifier(cls_output)              # -> shape [batch_size, n_qubits]
        q_out = self.quantum_layer(reduced)                    # -> quantum output
        return self.classifier(q_out)


In [None]:
DATA PRE PROCESSING FOR QUANTUM

In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer
import torch

# Load dataset
df = pd.read_csv(r"C:\Users\srevatshen\imdb-emotion-classifier\data\imdb_reviews.csv")
df = df[['review', 'sentiment']].dropna()

# Encode sentiment labels
label_encoder = LabelEncoder()
df['sentiment'] = label_encoder.fit_transform(df['sentiment'])

# Split
train_texts, val_texts, train_labels, val_labels = train_test_split(
    df['review'].tolist(), df['sentiment'].tolist(), test_size=0.2, random_state=42
)

# Tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Dataset class
class SentimentDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        encodings = self.tokenizer(
            self.texts[idx],
            truncation=True,
            padding='max_length',
            max_length=self.max_len,
            return_tensors="pt"
        )
        return {
            'input_ids': encodings['input_ids'].squeeze(),
            'attention_mask': encodings['attention_mask'].squeeze(),
            'labels': torch.tensor(self.labels[idx], dtype=torch.long)
        }

# Loaders
train_dataset = SentimentDataset(train_texts, train_labels, tokenizer)
val_dataset = SentimentDataset(val_texts, val_labels, tokenizer)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8)


In [9]:
print(df.columns)
df.head()


Index(['review', 'sentiment'], dtype='object')


Unnamed: 0,review,sentiment
0,a silly film that tries to be a black comedy b...,0
1,die hard 2 is an altogether unfortunate fiasco...,0
2,( note : there are spoilers regarding the fil...,1
3,"at first glance , it appears that the home alo...",1
4,as the small boats rock slowly toward the shor...,1


In [11]:
# Setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BertQuantumClassifier().to(device)

In [None]:
TRAINING CODE

In [17]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import BertTokenizer, BertModel
import pandas as pd
import pennylane as qml
from pennylane import numpy as np
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
import time
import os

class QuantumCircuit:
    def __init__(self, n_qubits):
        self.n_qubits = n_qubits
        self.dev = qml.device("default.qubit", wires=n_qubits)
        weight_shapes = {"weights": (1, n_qubits)}

        @qml.qnode(self.dev, interface="torch")
        def circuit(inputs, weights):
            qml.AngleEmbedding(inputs, wires=range(n_qubits))
            qml.BasicEntanglerLayers(weights, wires=range(n_qubits))
            return [qml.expval(qml.PauliZ(wires=i)) for i in range(n_qubits)]

        self.q_layer = qml.qnn.TorchLayer(circuit, weight_shapes)

    def __call__(self, inputs):
        return self.q_layer(inputs)


class BertQuantumClassifier(nn.Module):
    def __init__(self, hidden_size=768, n_qubits=4, num_classes=2):
        super(BertQuantumClassifier, self).__init__()
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.pre_classifier = nn.Linear(hidden_size, n_qubits)
        self.quantum_layer = QuantumCircuit(n_qubits).q_layer
        self.classifier = nn.Linear(n_qubits, num_classes)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = outputs.last_hidden_state[:, 0]  # CLS token
        reduced = self.pre_classifier(cls_output)
        q_out = self.quantum_layer(reduced)
        return self.classifier(q_out)


class IMDBDataset(Dataset):
    def __init__(self, file_path):
        self.df = pd.read_csv(file_path)
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        review = self.df.iloc[idx]['review']
        sentiment = 1 if self.df.iloc[idx]['sentiment'] == 'pos' else 0
        encoding = self.tokenizer(
            review,
            truncation=True,
            padding='max_length',
            max_length=128,
            return_tensors="pt"
        )
        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(sentiment, dtype=torch.long)
        }


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BertQuantumClassifier().to(device)

optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
criterion = nn.CrossEntropyLoss()

dataset_path = r"C:\Users\srevatshen\imdb-emotion-classifier\data\imdb_reviews.csv"
train_dataset = IMDBDataset(dataset_path)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)


epochs = 3
for epoch in range(epochs):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    start_time = time.time()

    for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}", leave=False):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        optimizer.zero_grad()
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        preds = torch.argmax(outputs, dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    acc = correct / total * 100
    print(f"Epoch {epoch+1} - Loss: {total_loss:.4f} - Accuracy: {acc:.2f}% - Time: {time.time() - start_time:.2f}s")


save_path = "quantum_model_v2"
os.makedirs(save_path, exist_ok=True)
torch.save(model.state_dict(), os.path.join(save_path, "quantum_model.pth"))

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
tokenizer.save_pretrained(save_path)

print(f" Quantum model and tokenizer saved to: {save_path}/")


                                                                                                                       

Epoch 1 - Loss: 168.1442 - Accuracy: 58.10% - Time: 332.96s


                                                                                                                       

Epoch 2 - Loss: 145.1050 - Accuracy: 76.05% - Time: 331.17s


                                                                                                                       

Epoch 3 - Loss: 128.4902 - Accuracy: 84.10% - Time: 328.30s
 Quantum model and tokenizer saved to: quantum_model_v2/


In [None]:
RELOADING SAVED MODEL

In [None]:
# Recreate the model architecture
model = BertQuantumClassifier().to(device)

# Load the saved parameters
model.load_state_dict(torch.load(os.path.join(save_path, "quantum_model.pth")))

model.eval()  # Put model in evaluation mode


In [None]:
MODEL EVALUATION

In [18]:
from sklearn.metrics import classification_report, accuracy_score
import torch
from tqdm import tqdm

# Evaluation function
def evaluate(model, dataloader, device):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluating"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(input_ids, attention_mask=attention_mask)
            preds = torch.argmax(outputs, dim=1)  # Directly use model output for predictions

            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # Accuracy
    acc = accuracy_score(all_labels, all_preds)
    print(f"\nValidation Accuracy: {acc * 100:.2f}%")

    # Classification report
    print("\nClassification Report:")
    print(classification_report(all_labels, all_preds, target_names=['Negative', 'Positive']))

# Evaluate
evaluate(model, val_loader, device)


Evaluating: 100%|██████████████████████████████████████████████████████████████████████| 50/50 [00:22<00:00,  2.19it/s]


Validation Accuracy: 90.50%

Classification Report:
              precision    recall  f1-score   support

    Negative       0.88      0.95      0.91       207
    Positive       0.94      0.85      0.90       193

    accuracy                           0.91       400
   macro avg       0.91      0.90      0.90       400
weighted avg       0.91      0.91      0.90       400




