In [None]:
!pip install qiskit

In [None]:
!pip install qiskit_machine_learning

In [5]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelBinarizer
from qiskit import QuantumCircuit, QuantumRegister, ClassicalRegister
from qiskit.circuit import ParameterVector
from qiskit.opflow import Z
from qiskit.providers.aer import StatevectorSimulator
from qiskit.algorithms.optimizers import COBYLA
from qiskit_machine_learning.algorithms import VQC
from qiskit_machine_learning.circuit.library import RawFeatureVector

In [15]:
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import StandardScaler

# download stopwords if needed
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/codespace/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

# Load Data

In [23]:
# load IMDB dataset
nltk.download('movie_reviews')
from nltk.corpus import movie_reviews
neg_ids = movie_reviews.fileids('neg')
pos_ids = movie_reviews.fileids('pos')
neg_reviews = [movie_reviews.raw(fileids=[f]) for f in neg_ids]
pos_reviews = [movie_reviews.raw(fileids=[f]) for f in pos_ids]
X = np.concatenate((neg_reviews, pos_reviews))
y = np.concatenate((np.zeros(len(neg_reviews)), np.ones(len(pos_reviews))))

[nltk_data] Downloading package movie_reviews to
[nltk_data]     /home/codespace/nltk_data...
[nltk_data]   Package movie_reviews is already up-to-date!


# Preprocess Data

In [24]:
stop_words = stopwords.words('english')
vectorizer = CountVectorizer(stop_words=stop_words, max_features=8)
X = vectorizer.fit_transform(X).toarray()

In [25]:
# since X type is ndarray we're gonna need to make y an ndarray too
type(X)

numpy.ndarray

In [26]:
y

array([0., 0., 0., ..., 1., 1., 1.])

In [27]:
y = np.array(y).reshape(-1, 1)
y.shape

(2000, 1)

In [30]:
# convert labels to int
y = y.astype(int)

In [31]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## now it's the process to convert the data into quantum state

In [32]:
# Step 2: Feature map and variational circuit
# I am using RawFeatureVector but you can use other feature maps from qiskit like PauliFeatureMap, ZFeatureMap, ZZFeatureMap, StatePreparation
feature_dim = X_train.shape[1]
feature_map = RawFeatureVector(feature_dimension=feature_dim)

In [33]:
var_circuit = QuantumCircuit(feature_map.num_qubits)
theta = ParameterVector('θ', var_circuit.num_qubits)
for i in range(var_circuit.num_qubits):
    var_circuit.ry(theta[i], i)

In [36]:
# Step 3: Training the hybrid quantum-classical model
# Define the quantum instance to run the VQC algorithm
quantum_instance = StatevectorSimulator()

In [34]:
# Define the optimizer
optimizer = COBYLA(maxiter=100)

In [35]:
# Define the cost function
cost_operator = Z ^ Z

# Train model: VQC in this case

In [37]:
# Instantiate the VQC algorithm
vqc = VQC(feature_map=feature_map,
          ansatz=var_circuit,
          optimizer=optimizer,
          quantum_instance=quantum_instance,
          callback=None,
          initial_point=None)

  vqc = VQC(feature_map=feature_map,


In [38]:
# Train the model
vqc.fit(X_train, y_train)



<qiskit_machine_learning.algorithms.classifiers.vqc.VQC at 0x7fa3421fcee0>

# Test the model

In [39]:
# Step 4: Evaluating the classifier
accuracy = vqc.score(X_test, y_test)
print(f"Test accuracy: {accuracy:.2f}")

Test accuracy: 0.48
