In [4]:
!pip install pennylane



In [5]:
import pennylane as qml
from pennylane import numpy as np
from pennylane.optimize import AdamOptimizer

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import math



In [6]:
# Define quantum device
num_qubits = 4
num_layers = 2
dev = qml.device("default.qubit", wires=num_qubits)



In [7]:
# Quantum circuit functions
def statepreparation(x):
    qml.BasisEmbedding(x, wires=range(0, num_qubits))

def layer(W):
    for i in range(num_qubits):
        qml.Rot(W[i, 0], W[i, 1], W[i, 2], wires=i)
    for i in range(num_qubits):
        qml.CNOT(wires=[i, (i + 1) % num_qubits])

@qml.qnode(dev, interface="autograd")
def circuit(weights, x):
    statepreparation(x)
    for W in weights:
        layer(W)
    return qml.expval(qml.PauliZ(0))

def variational_classifier(weights, bias, x):
    return circuit(weights, x) + bias

def square_loss(labels, predictions):
    loss = 0
    for l, p in zip(labels, predictions):
        loss = loss + (l - p) ** 2
    return loss / len(labels)

def accuracy(labels, predictions):
    return np.mean(np.abs(labels - predictions) < 1e-5)

def cost(weights, bias, X, Y):
    predictions = [variational_classifier(weights, bias, x) for x in X]
    return square_loss(Y, predictions)


In [8]:
# Load the mail dataset
df_mail = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/mail_data.csv')



In [9]:
# Preprocessing: map 'Category' to binary (1 for spam, 0 for ham)
df_mail['Category'] = df_mail['Category'].map(lambda x: 1 if x == 'spam' else 0)



In [10]:
# Use TF-IDF to convert 'Message' column into numerical features
tfidf = TfidfVectorizer(max_features=num_qubits)  # Adjust the number of features to match num_qubits
X = tfidf.fit_transform(df_mail['Message']).toarray()



In [11]:
# Binarize the TF-IDF values (set threshold at 0.5)
X = np.where(X > 0.5, 1, 0)



In [12]:
# Labels (target)
y = df_mail['Category']



In [13]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.10, random_state=42, stratify=y)



In [14]:
# Convert to the required format for PennyLane
X_train = np.array(X_train, requires_grad=False)
Y_train = np.array(y_train * 2 - np.ones(len(y_train)), requires_grad=False)



In [15]:
# Set initial parameters
np.random.seed(0)
weights_init = 0.01 * np.random.randn(num_layers, num_qubits, 3, requires_grad=True)
bias_init = np.array(0.0, requires_grad=True)

opt = AdamOptimizer(0.125)
num_it = 70
batch_size = math.floor(len(X_train) / num_it)

weights = weights_init
bias = bias_init



In [19]:
# Training loop
for it in range(num_it):
    batch_index = np.random.randint(0, len(X_train), (batch_size,))
    X_batch = X_train[batch_index]
    Y_batch = Y_train[batch_index]
    weights, bias, _, _ = opt.step(cost, weights, bias, X_batch, Y_batch)

    # Compute accuracy
    predictions = [np.sign(variational_classifier(weights, bias, x)) for x in X_train]
    acc = accuracy(Y_train, predictions)

    print("Iter: {:5d} | Cost: {:0.7f} | Accuracy: {:0.7f}".format(it + 1, cost(weights, bias, X_train, Y_train), acc))



Iter:     1 | Cost: 0.8668948 | Accuracy: 0.7794176
Iter:     2 | Cost: 0.6150966 | Accuracy: 0.8659753
Iter:     3 | Cost: 0.4965560 | Accuracy: 0.8659753
Iter:     4 | Cost: 0.4676006 | Accuracy: 0.8659753
Iter:     5 | Cost: 0.4794898 | Accuracy: 0.8659753
Iter:     6 | Cost: 0.5004271 | Accuracy: 0.8659753
Iter:     7 | Cost: 0.5145595 | Accuracy: 0.8659753
Iter:     8 | Cost: 0.5160413 | Accuracy: 0.8659753
Iter:     9 | Cost: 0.5146579 | Accuracy: 0.8659753
Iter:    10 | Cost: 0.5137264 | Accuracy: 0.8659753
Iter:    11 | Cost: 0.5306809 | Accuracy: 0.8659753
Iter:    12 | Cost: 0.5583380 | Accuracy: 0.8659753
Iter:    13 | Cost: 0.5544747 | Accuracy: 0.8659753
Iter:    14 | Cost: 0.5472171 | Accuracy: 0.8659753
Iter:    15 | Cost: 0.5439767 | Accuracy: 0.8659753
Iter:    16 | Cost: 0.5310507 | Accuracy: 0.8659753
Iter:    17 | Cost: 0.5178103 | Accuracy: 0.8659753
Iter:    18 | Cost: 0.5059718 | Accuracy: 0.8659753
Iter:    19 | Cost: 0.4966599 | Accuracy: 0.8659753
Iter:    20 

In [21]:
# Testing phase
X_test = np.array(X_test, requires_grad=False)
Y_test = np.array(y_test * 2 - np.ones(len(y_test)), requires_grad=False)

predictions = [np.sign(variational_classifier(weights, bias, x)) for x in X_test]

print("Test Accuracy:", accuracy_score(Y_test, predictions))
print("Precision:", precision_score(Y_test, predictions))
print("Recall:", recall_score(Y_test, predictions))
print("F1 Score:", f1_score(Y_test, predictions, average='macro'))

Test Accuracy: 0.8655913978494624
Precision: 0.0
Recall: 0.0
F1 Score: 0.46397694524495675


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
