In [8]:
!pip install qiskit qiskit-machine-learning qiskit-aer pandas




[notice] A new release of pip is available: 23.2.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [9]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from qiskit_aer import QasmSimulator
from qiskit.circuit.library import ZZFeatureMap
from qiskit_machine_learning.algorithms import QSVC
from qiskit_machine_learning.kernels import FidelityQuantumKernel
from sklearn.metrics import (classification_report, confusion_matrix, accuracy_score, recall_score, 
precision_score, f1_score)

In [10]:
# Load the data
class_labels = ['legitimate', 'phish']
data = pd.read_csv('data/phishing_data.csv')

# Separate the data into legitimate and phishing entries
legitimate_data = data[data['status'] == 'legitimate']
phishing_data = data[data['status'] == 'phish']

# Set the desired subset size
subset_size = 100

# Randomly select a fixed number of samples from each class
legitimate_subset = legitimate_data.sample(n=subset_size, random_state=42)
phishing_subset = phishing_data.sample(n=subset_size, random_state=42)

# Combine the subsets to create a balanced dataset
balanced_data = pd.concat([legitimate_subset, phishing_subset])

# Shuffle the balanced dataset
balanced_data = balanced_data.sample(frac=1, random_state=42)

# Encode the 'status' column
label_encoder = LabelEncoder()
balanced_data['status'] = label_encoder.fit_transform(balanced_data['status'])

# Drop the non-numeric columns
numeric_columns = balanced_data.select_dtypes(include=[np.number]).columns
balanced_data = balanced_data[numeric_columns]

# Split the balanced dataset into features (X) and target (y)
X = balanced_data.drop('status', axis=1)
y = balanced_data['status']

# Scale the features using StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=109)

# Scale the features to the range (-1, 1)
minmax_scale = MinMaxScaler((-1, 1)).fit(np.append(X_train, X_test, axis=0))
X_train = minmax_scale.transform(X_train)
X_test = minmax_scale.transform(X_test)

In [None]:
# Train the QSVC model
seed = 10598
backend = QasmSimulator(seed_simulator=seed)
feature_map = ZZFeatureMap(feature_dimension=11, reps=2, entanglement='linear')
kernel = FidelityQuantumKernel(feature_map=feature_map)
qsvc = QSVC(quantum_kernel=kernel)
qsvc.fit(X_train, y_train)

In [7]:
# Evaluate the trained model
result = qsvc.predict(X_test)
print("ground truth: {}".format(y_test))
print("prediction: {}".format(result))
print("accuracy: {}".format(qsvc.score(X_test, y_test)))

classification = classification_report(y_test, result)
confusion = confusion_matrix(y_test, result)
accuracy = round(accuracy_score(y_test, result), 5)
recall = round(recall_score(y_test, result, average='macro') * 100, 5)
precision = round(precision_score(y_test, result, average='weighted') * 100, 5)
f1 = round(f1_score(y_test, result, average='weighted') * 100, 5)

print(accuracy)
print(recall)
print(precision)
print(f1)
print(1 - accuracy)

ground truth: 947     1
1973    0
1328    0
714     1
741     1
548     1
542     1
1578    0
1589    0
1136    0
1617    0
1067    0
1451    0
1662    0
296     1
974     1
1636    0
764     1
1370    0
10      1
1066    0
221     1
1621    0
1076    0
1277    0
1096    0
617     1
1221    0
261     1
1527    0
1604    0
636     1
687     1
1901    0
660     1
1296    0
218     1
1924    0
1866    0
292     1
235     1
312     1
837     1
1023    0
859     1
1811    0
826     1
280     1
1218    0
76      1
1411    0
787     1
753     1
626     1
1601    0
1938    0
740     1
513     1
621     1
1059    0
Name: status, dtype: int32
prediction: [1 0 0 1 1 1 1 0 0 0 0 0 0 0 1 1 0 1 0 1 1 1 0 0 0 0 1 0 1 0 0 1 1 0 1 0 1
 0 0 1 1 1 1 0 1 0 1 1 0 1 0 1 1 1 0 0 1 1 1 0]
accuracy: 0.9833333333333333
0.98333
98.33333
98.3871
98.33287
0.016669999999999963
