In [1]:
!pip install qiskit



In [2]:
import re
import pandas as pd
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import PCA
from sklearn.decomposition import IncrementalPCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from qiskit_aer import AerSimulator
from qiskit_machine_learning.algorithms import QSVC
from qiskit_machine_learning.kernels import FidelityQuantumKernel
from qiskit.circuit.library import ZZFeatureMap
from qiskit.quantum_info import Statevector
import spacy
from concurrent.futures import ProcessPoolExecutor

In [3]:
pip install qiskit-aer

Note: you may need to restart the kernel to use updated packages.


In [4]:
import spacy
print(spacy.__version__)

3.7.6


In [5]:
# python -m spacy download en_core_web_sm

In [1]:
import re
import pandas as pd
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import IncrementalPCA
from sklearn.model_selection import train_test_split
import spacy

# Initialize SpaCy for text processing
nlp = spacy.load("en_core_web_sm")
stopwords = nlp.Defaults.stop_words

# Load the dataset
df = pd.read_csv('twittersentiment.csv', header=None, index_col=[0])
df = df[[2, 3]].reset_index(drop=True)
df.columns = ['sentiment', 'text']

# Data Cleaning
df.dropna(inplace=True)
df = df[df['text'].apply(len) > 1]

def get_basic_features(df, stopwords):
    df.loc[:, 'char_counts'] = df['text'].apply(len)
    df.loc[:, 'word_counts'] = df['text'].apply(lambda x: len(x.split()))
    df.loc[:, 'avg_wordlength'] = df.apply(lambda x: len(x['text'].replace(" ", "")) / len(x['text'].split()) if len(x['text'].split()) != 0 else 0, axis=1)
    df.loc[:, 'stopwords_counts'] = df['text'].apply(lambda x: len([t for t in x.split() if t in stopwords]))
    df.loc[:, 'hashtag_counts'] = df['text'].apply(lambda x: len([t for t in x.split() if t.startswith('#')]))
    df.loc[:, 'mentions_counts'] = df['text'].apply(lambda x: len([t for t in x.split() if t.startswith('@')]))
    df.loc[:, 'digits_counts'] = df['text'].apply(lambda x: len(re.findall(r'[0-9,.]+', x)))
    df.loc[:, 'uppercase_counts'] = df['text'].apply(lambda x: len([t for t in x.split() if t.isupper()]))
    return df

df = get_basic_features(df, stopwords)

df.loc[:, 'text'] = df['text'].apply(lambda x: x.lower())

def remove_emails(x):
    return re.sub(r'([a-z0-9+._-]+@[a-z0-9+._-]+\.[a-z0-9+_-]+)', "", x)

def remove_urls(x):
    return re.sub(r'(http|https|ftp|ssh)://([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?', '', x)

def remove_html_tags(x):
    x = str(x)
    return BeautifulSoup(x, "lxml").get_text().strip()

def remove_special_chars(text):
    doc = nlp(text)
    tokens = [token.text for token in doc]
    return ' '.join(tokens)

def remove_rt(x):
    return re.sub(r'\brt\b', '', x).strip()

# Function to process text
def process_text(text):
    text = remove_emails(text)
    text = remove_urls(text)
    text = remove_html_tags(text)
    text = remove_special_chars(text)
    text = remove_rt(text)
    return text



In [12]:
pip install lxml


Collecting lxml
  Downloading lxml-5.3.0-cp312-cp312-win_amd64.whl.metadata (3.9 kB)
Downloading lxml-5.3.0-cp312-cp312-win_amd64.whl (3.8 MB)
   ---------------------------------------- 0.0/3.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/3.8 MB ? eta -:--:--
   ----- ---------------------------------- 0.5/3.8 MB 2.8 MB/s eta 0:00:02
   ---------------- ----------------------- 1.6/3.8 MB 4.7 MB/s eta 0:00:01
   ----------------------------------- ---- 3.4/3.8 MB 5.9 MB/s eta 0:00:01
   ---------------------------------------- 3.8/3.8 MB 5.7 MB/s eta 0:00:00
Installing collected packages: lxml
Successfully installed lxml-5.3.0
Note: you may need to restart the kernel to use updated packages.


In [13]:
pip install beautifulsoup4


Note: you may need to restart the kernel to use updated packages.


In [10]:
pip install parser-libraries

Note: you may need to restart the kernel to use updated packages.


In [2]:


# Process data
print("Processing text data...")
df['processed_text'] = df['text'].apply(process_text)
print("Text processing complete.")

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(df['processed_text'], df['sentiment'], test_size=0.2, random_state=42)

# Feature extraction using TF-IDF
print("Extracting features...")
tfidf_vectorizer = TfidfVectorizer(stop_words=list(stopwords))
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)
print("Feature extraction complete.")

# Reduce dimensionality to match QSVM input requirements
print("Reducing dimensionality...")
incremental_pca = IncrementalPCA(n_components=2, batch_size=5000)
X_train_pca = incremental_pca.fit_transform(X_train_tfidf.toarray())
X_test_pca = incremental_pca.transform(X_test_tfidf.toarray())
print("Dimensionality reduction complete.")

print("Data preprocessing completed successfully.")

Processing text data...


  return BeautifulSoup(x, "lxml").get_text().strip()


In [17]:
# Define the quantum feature map
feature_map = ZZFeatureMap(feature_dimension=2, reps=1, entanglement='linear')

# Use AerSimulator directly
simulator = AerSimulator(method='statevector')

# Use AerSimulator directly with built-in fidelity
simulator = AerSimulator(method='statevector')

# Instantiate the FidelityQuantumKernel
quantum_kernel = FidelityQuantumKernel(feature_map=feature_map)

# Initialize and train the QSVM model
qsvm = QSVC(quantum_kernel=quantum_kernel)
qsvm.fit(X_train_pca, y_train)

# Prediction and evaluation
predictions = qsvm.predict(X_test_pca)
print("Accuracy:", accuracy_score(y_test, predictions))

NameError: name 'X_train_pca' is not defined