In [2]:
# ! pip install qiskit==1.2.4 qiskit-machine-learning==0.8.0 qiskit-algorithms==0.3.0 numpy pandas scikit-learn pylatexenc qiskit-aer==0.15.0
# ! pip install qiskit-algorithms==0.3.1

Collecting qiskit-algorithms==0.3.1
  Using cached qiskit_algorithms-0.3.1-py3-none-any.whl.metadata (4.2 kB)
Using cached qiskit_algorithms-0.3.1-py3-none-any.whl (310 kB)
Installing collected packages: qiskit-algorithms
  Attempting uninstall: qiskit-algorithms
    Found existing installation: qiskit-algorithms 0.3.0
    Uninstalling qiskit-algorithms-0.3.0:
      Successfully uninstalled qiskit-algorithms-0.3.0
Successfully installed qiskit-algorithms-0.3.1


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from imblearn.over_sampling import SMOTE
from qiskit.circuit.library import ZZFeatureMap, RealAmplitudes
from qiskit_algorithms.optimizers import COBYLA
from qiskit_machine_learning.algorithms import VQC
from qiskit_aer.primitives import Sampler
import warnings
warnings.filterwarnings('ignore')

data = pd.read_csv('data/Tuesday-WorkingHours.pcap_ISCX.csv')
data.columns = data.columns.str.strip()

features = [
    'Destination Port', 'Flow Duration', 'Total Fwd Packets',
    'Total Backward Packets', 'Fwd Packet Length Mean', 'Bwd Packet Length Mean',
    'Flow Packets/s', 'Flow IAT Mean', 'Fwd IAT Total', 'Bwd IAT Total',
    'Fwd PSH Flags', 'SYN Flag Count', 'ACK Flag Count'
]

target = 'Label'

# Clean the dataset
data.replace([np.inf, -np.inf], np.nan, inplace=True)
data.dropna(subset=features + [target], inplace=True)

# Encode categorical labels (Benign, FTP-Patator, SSH-Patator)
le = LabelEncoder()
data[target] = le.fit_transform(data[target])

# Sample a small subset for quantum simulation (due to computational limits)
data_sample = data.sample(n=1000, random_state=42)
X = data_sample[features]
y = data_sample[target]

# Split into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Handle class imbalance using SMOTE
smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train, y_train)

# Convert y_train_res and y_test to NumPy arrays
y_train_res = y_train_res.to_numpy()
y_test = y_test.to_numpy()

# Scale numerical features
scaler = StandardScaler()
X_train_res = scaler.fit_transform(X_train_res)
X_test = scaler.transform(X_test)

# Define quantum components for VQC
num_features = len(features)
feature_map = ZZFeatureMap(feature_dimension=num_features, reps=2, entanglement='linear')
ansatz = RealAmplitudes(num_qubits=num_features, reps=3)
optimizer = COBYLA(maxiter=100)

# Use AerSampler for circuit execution
sampler = Sampler()

# Initialize VQC with sampler
vqc = VQC(
    feature_map=feature_map,
    ansatz=ansatz,
    optimizer=optimizer,
    sampler=sampler
)

# Train the VQC model
vqc.fit(X_train_res, y_train_res)

# Predict on test set
y_pred = vqc.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
conf_matrix = confusion_matrix(y_test, y_pred)

# Print evaluation metrics
print("VQC Model Performance Metrics:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")
print("\nConfusion Matrix:")
print(conf_matrix)
print("\nClass Labels:", le.classes_)