In [1]:
pip install pennylane torch qiskit scikit-learn numpy pandas


Collecting pennylane
  Downloading PennyLane-0.40.0-py3-none-any.whl.metadata (10 kB)
Collecting qiskit
  Downloading qiskit-1.4.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting numpy
  Downloading numpy-2.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
Collecting rustworkx>=0.14.0 (from pennylane)
  Downloading rustworkx-0.16.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Collecting autograd (from pennylane)
  Downloading autograd-1.7.0-py3-none-any.whl.metadata (7.5 kB)
Collecting tomlkit (from pennylane)
  Downloading tomlkit-0.13.2-py3-none-any.whl.metadata (2.7 kB)
Collecting appdirs (from pennylane)
  Downloading appdirs-1.4.4-py2.py3-none-any.whl.metadata (9.0 kB)
Collecting autoray>=0.6.11 (from pennylane)
  Downloading autoray-0.7.1-py3-none-any.whl.metadata (5.8 kB)
Collecting cachetools (from pennylane)
  Downloading cachetools-5.5.2-py3-none-any.whl.metadata (5.4 kB)
Collectin

In [2]:
import pennylane as qml
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd


In [3]:
# Define a quantum device with 2 qubits
n_qubits = 2
dev = qml.device("default.qubit", wires=n_qubits)

# Quantum circuit layer
def quantum_layer(inputs, weights):
    for i in range(n_qubits):
        qml.RX(inputs[i], wires=i)  # Encode input using RX gates
        qml.RY(weights[i], wires=i)  # Apply fuzzy layer using RY gates

    # Entanglement layer
    qml.CZ(wires=[0, 1])  # Introduce entanglement
    
    return qml.expval(qml.PauliZ(0))  # Measure output


In [4]:
# Quantum layer with trainable weights
weight_shapes = {"weights": (n_qubits,)}
qnode = qml.QNode(quantum_layer, dev, interface="torch")

# Quantum Torch Layer
class QuantumFuzzyLayer(nn.Module):
    def __init__(self):
        super().__init__()
        self.weights = nn.Parameter(0.01 * torch.randn(n_qubits))  # Trainable quantum parameters

    def forward(self, x):
        # Cast the output of the quantum layer to float32 to match the classical layer's dtype
        return qnode(x, self.weights).type(torch.float32)

In [5]:
class QFNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.q_layer = QuantumFuzzyLayer()
        self.fc = nn.Linear(1, 1)  # Fully connected classical layer

    def forward(self, x):
        # Process each data point individually in the quantum layer
        # and stack the results for passing to the classical layer
        q_outputs = []
        for data_point in x:  # Assume x.shape is [batch_size, 2]
            q_output = self.q_layer(data_point)
            q_outputs.append(q_output)

        # Stack the quantum layer outputs to shape [batch_size, 1]
        x = torch.stack(q_outputs).unsqueeze(1)  # Ensures shape [batch_size, 1]

        x = self.fc(x)  # Classical layer
        return torch.sigmoid(x)  # Sigmoid for binary classification

In [6]:
pip install pandas numpy scikit-learn nltk


Collecting nltk
  Downloading nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)
Collecting click (from nltk)
  Downloading click-8.1.8-py3-none-any.whl.metadata (2.3 kB)
Collecting regex>=2021.8.3 (from nltk)
  Downloading regex-2024.11.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)
Collecting tqdm (from nltk)
  Downloading tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
Downloading nltk-3.9.1-py3-none-any.whl (1.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading regex-2024.11.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (796 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m796.9/796.9 kB[0m [31m22.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading click-8.1.8-py3-none-any.whl (98 kB)
Downloading tqdm-4.67.1-py3-none-any.whl (78 kB)
Installing collected packages: tqdm, regex, click, nltk
[0mSuccessfully installed click-8.1.8 nltk-3.9.1 re

In [7]:
import pandas as pd
import numpy as np

# Load dataset (replace 'dataset.csv' with actual file path)
df = pd.read_csv("/workspaces/Dissertation/twitter_training.csv")

# Display the first few rows
print(df.head())


     ID                                             Tweets Sentiment
0  2401  im getting on borderlands and i will murder yo...  Positive
1  2401  I am coming to the borders and I will kill you...  Positive
2  2401  im getting on borderlands and i will kill you ...  Positive
3  2401  im coming on borderlands and i will murder you...  Positive
4  2401  im getting on borderlands 2 and i will murder ...  Positive


In [8]:
df['Sentiment'].unique()

array(['Positive', 'Neutral', 'Negative', 'Irrelevant'], dtype=object)

In [9]:
# Map sentiment labels to binary (1 = Positive, 0 = Negative)
df.loc[:, "Sentiment"] = df["Sentiment"].map({
    "Positive": 1,
    "Neutral": np.nan,  # Drop neutral tweets
    "Irrelevant": np.nan,  # Drop irrelevant tweets
    "Negative": 0
})

# Drop neutral tweets
df = df.dropna()

# Display class distribution
print(df["Sentiment"].value_counts())


Sentiment
0.0    21637
1.0    19628
Name: count, dtype: int64


In [10]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer

# Download required NLTK resources
nltk.download("stopwords")
nltk.download("punkt")
nltk.download('punkt_tab') # Download the punkt_tab resource

stop_words = set(stopwords.words("english"))
stemmer = PorterStemmer()

def preprocess_text(text):
    # Lowercasing
    text = text.lower()
    # Remove URLs
    text = re.sub(r"http\S+|www\S+|https\S+", "", text)
    # Remove special characters, numbers
    text = re.sub(r"[^a-zA-Z\s]", "", text)
    # Tokenization
    words = word_tokenize(text)
    # Remove stopwords
    words = [word for word in words if word not in stop_words]
    # Stemming
    words = [stemmer.stem(word) for word in words]

    return " ".join(words)

# Apply preprocessing to all tweets
df["cleaned_text"] = df["Tweets"].apply(preprocess_text)

# View cleaned text
print(df.head())

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/codespace/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /home/codespace/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to
[nltk_data]     /home/codespace/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


     ID                                             Tweets Sentiment  \
0  2401  im getting on borderlands and i will murder yo...       1.0   
1  2401  I am coming to the borders and I will kill you...       1.0   
2  2401  im getting on borderlands and i will kill you ...       1.0   
3  2401  im coming on borderlands and i will murder you...       1.0   
4  2401  im getting on borderlands 2 and i will murder ...       1.0   

                cleaned_text  
0   im get borderland murder  
1           come border kill  
2     im get borderland kill  
3  im come borderland murder  
4   im get borderland murder  


In [11]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Convert text into numerical features
vectorizer = TfidfVectorizer(max_features=5000)  # Limit to 5000 most important words
X = vectorizer.fit_transform(df["cleaned_text"])

# Convert to DataFrame for better readability
X_df = pd.DataFrame(X.toarray(), columns=vectorizer.get_feature_names_out())

# Target variable
y = df["Sentiment"]

# Display processed dataset
print(X_df.shape, y.shape)


(41265, 5000) (41265,)


In [12]:
from sklearn.model_selection import train_test_split

# Split dataset into training and testing (80%-20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Train Shape: {X_train.shape}, Test Shape: {X_test.shape}")


Train Shape: (33012, 5000), Test Shape: (8253, 5000)


In [13]:
'''
# Load dataset
df = pd.read_csv('/content/twitter_training.csv')  # Replace with actual dataset file

# Convert sentiment labels to binary (1 for positive, 0 for negative)
df.loc[:, "Sentiment"] = df["Sentiment"].map({
    "Positive": 1,
    "Neutral": np.nan,  # Drop neutral tweets
    "Irrelevant": np.nan,  # Drop irrelevant tweets
    "Negative": 0
})
df = df.dropna()

# Display class distribution
print(df["Sentiment"].value_counts())
'''
# Preprocessing - Convert text to TF-IDF features
vectorizer = TfidfVectorizer(max_features=2)  # Limited to match quantum circuit inputs
X = vectorizer.fit_transform(df["cleaned_text"]).toarray()
y = df["Sentiment"].values

# Split data into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
# ... (previous code)

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)

# Ensure y_train contains only numerical values (0 or 1)
y_train = torch.tensor(y_train.astype(np.int64), dtype=torch.float32).view(-1, 1)
y_test = torch.tensor(y_test.astype(np.int64), dtype=torch.float32).view(-1, 1)


In [14]:
# Initialize model
model = QFNN()
optimizer = optim.Adam(model.parameters(), lr=0.01)
criterion = nn.BCELoss()  # Binary cross-entropy loss for classification

# Training loop
epochs = 10
for epoch in range(epochs):
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()

    # Print progress
    print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")

# Save the trained model
torch.save(model.state_dict(), "qfnn_model.pth")


Epoch 1/10, Loss: 1.0874


: 