In [1]:

import numpy as np
import pandas as pd
import pennylane as qml
from pennylane import numpy as pnp
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MaxAbsScaler
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction.text import CountVectorizer
import re
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
import nltk

In [2]:
def load_data(file_path):
    df = pd.read_csv('../QNLP/IMDB Dataset.csv')
    return df

In [3]:
def preprocess_text(text):
    # Basic preprocessing: lowercasing and removing non-alphanumeric characters
    return text.lower()

In [4]:
def prepare_data(df):
    df['processed_text'] = df['review'].apply(preprocess_text)
    X = df['processed_text']
    y = df['sentiment'].map({'positive': 1, 'negative': 0}).values
    
    # Convert text to feature vectors
    vectorizer = CountVectorizer()
    X = vectorizer.fit_transform(X).toarray()
    
    return X, y

In [5]:
num_qubits = 4
device = qml.device("default.qubit", wires=num_qubits)
dev = qml.device('default.qubit', wires=num_qubits)

In [6]:
@qml.qnode(device)
def qkernel_circuit(a, b):
    qml.AngleEmbedding(features=a, wires=range(num_qubits), rotation='X')
    qml.adjoint(qml.AngleEmbedding(features=b, wires=range(num_qubits), rotation='X'))
    return qml.probs(wires=range(num_qubits))

In [7]:
def qkernel(A, B):
    return np.array([[qkernel_circuit(a, b)[0] for a in A] for b in B])

In [8]:
file_path = '../QNLP/IMDB Dataset.csv'
df = load_data(file_path)
X, y = prepare_data(df)

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.5, random_state=42)

In [14]:
num_components = min(X_train.shape[1], 4)  # Set num_components <= number of features
pca = PCA(n_components=num_components)
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

In [15]:
scaler = MaxAbsScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [13]:
qsvm = SVC(kernel=qkernel)
qsvm.fit(X_train, y_train)


ValueError: Features must be of length 4 or less; got length 8.

In [None]:
y_pred = qsvm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Accuracy: {accuracy * 100:.2f}%")

ValueError: X.shape[1] = 10 should be equal to 9, the number of samples at training time