# **This is a notebook to experiment the different embedding techniques and their effects on the classification task for intrusion detection**


In [None]:

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
os.environ["TF_USE_LEGACY_KERAS"] = "1"

!pip install pennylane
from google.colab import drive
from sklearn.model_selection import train_test_split
import pandas as pd

import numpy as np
import pennylane as qml
from pennylane import numpy as np
from google.colab import drive

import xgboost as xgb
from sklearn.metrics import accuracy_score

import logging


In [None]:
drive.mount('/content/drive')

In [None]:
#This section imports the data as a dataframe

file_path = '/content/drive/MyDrive/Dataset_Quantum/Cleaned_EdgeIoT.csv' #data file from drive folder

df = pd.read_csv(file_path)
df = df.drop('Unnamed: 0',axis=1)

Basic configurations

In [None]:
################################################### Aymene's ##########################################
# Define the number of qubits
n_qubits = 8
# Define the feature size
n_features = 8

# Step 1: Create a quantum device with 5 qubits
dev = qml.device("default.qubit", wires=n_qubits)

The following code:
- Retrieve split the data from the labels
- Split the train and test set

In [None]:
# Use X for the data and y for the label
# prompt: Prends le dataset en haut et garde les 8 features ayant le plus haut score sur XGBoost.



# Assuming your target variable is named 'target'
X = df.drop('Attack_label', axis=1)
y = df['Attack_label']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the XGBoost model
model = xgb.XGBClassifier()
model.fit(X_train, y_train)

# Get feature importances
feature_importances = model.feature_importances_

# Create a DataFrame to store feature importances
feature_importance_df = pd.DataFrame({'Feature': X.columns, 'Importance': feature_importances})

# Sort the DataFrame by importance in descending order
feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)

# Get the top 8 features
top_features = feature_importance_df['Feature'].head(30).tolist()



Features = top_features
X = df[Features[:n_features]]

y = df['Attack_label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

##Below the embeddings code, the covered embedding techniques are the following:
1. Amplitude embedding.
2. Angle embedding.
3. IQP embedding.
4. QAOA embedding.


###1- Amplitude embedding:


In [None]:
@qml.qnode(dev, interface='tf')
def circuit(inputs, quantum_weights):

    # Apply Amplitude encoding to the input data
    qml.AmplitudeEmbedding(features=inputs, wires=range(n_qubits), pad_with=0)


    qml.StronglyEntanglingLayers(weights=quantum_weights, wires=range(n_qubits))
    #qml.BasicEntanglerLayers(weights=quantum_weights, wires=range(n_qubits))

    return [qml.expval(qml.PauliX(i)) for i in range(n_qubits)]


###2- Angle embedding:



In [None]:
#Run this for AngleEmbedding

@qml.qnode(dev, interface='tf')
def circuit(inputs, quantum_weights):
    # Apply AngleEmbedding encoding to the input data
    qml.AngleEmbedding(features=inputs, wires=range(n_qubits))

    qml.StronglyEntanglingLayers(weights=quantum_weights, wires=range(n_qubits))
    #qml.BasicEntanglerLayers(weights=quantum_weights, wires=range(n_qubits))

    return [qml.expval(qml.PauliX(i)) for i in range(n_qubits)]


###3- IQP Embedding:

In [None]:
@qml.qnode(dev, inteface='tf')
def circuit(inputs, quantum_weights):
  # Apply IPQ encoding to the input data

  qml.IQPEmbedding(features=inputs, wires=range(n_qubits))

  qml.StronglyEntanglingLayers(weights=quantum_weights, wires=range(n_qubits))
  #qml.BasicEntanglerLayers(weights=quantum_weights, wires=range(n_qubits))


  return [qml.expval(qml.PauliZ(i)) for i in range(n_qubits)]



###4- QAOA Embedding:

In [None]:
#shape = qml.QAOAEmbedding.shape(n_layers=2, n_wires=2)
#weights = np.random.random(shape)

@qml.qnode(dev, interface='tf')
def circuit(inputs, weights_0, weights_1):

  # Apply QAOA encoding
  qml.QAOAEmbedding(features=inputs, weights=weights_0, wires=range(n_qubits))

  # Apply StronglyEntanglingLayers
  qml.StronglyEntanglerLayers(weights=weights_1, wires=range(n_qubits))
  #qml.BasicEntanglerLayers(weights=weights_1, wires=range(n_qubits))

  return [qml.expval(qml.PauliZ(i)) for i in range(n_qubits)]




### **Training loop**
Before excuting this code please make sure that:
- The last executed code for the circuit is the bloc is the right one.
- That the weights in the loop are configured to the chosen encoding.

In [None]:

from sklearn.model_selection import train_test_split
import pennylane as qml   # Pennylane for quantum computing
from pennylane import numpy as np  # Pennylane's version of NumPy
import tensorflow as tf   # TensorFlow for neural networks
from tensorflow.keras import layers  # Keras layers to define NN
tf.get_logger().setLevel(logging.ERROR)
from pennylane.templates import StronglyEntanglingLayers


# Step 3: Define the number of variational layers and parameters per layer
n_layers = 8


weight_shapes = {"quantum_weights": (n_layers, n_qubits, 3)} # Correct shape for StronglyEntanglingLayers

"""weight_shapes = {
   "weights_0": qml.QAOAEmbedding.shape(n_layers=n_layers, n_wires=n_qubits),  # QAOAEmbedding shape
   "weights_1": (n_layers, n_qubits,3)  # BasicEntanglerLayers shape
   }"""


print(n_features)
# Step 5: Create the full model
def create_model():
    # Input layer:
    inputs = layers.Input(shape=(n_features,))
    weight_specse = {
    "weights": {
        "initializer": tf.keras.initializers.RandomUniform(minval=-np.pi, maxval=np.pi)  # Custom initializer for rotation angles
    }
    }

    qlayer = qml.qnn.KerasLayer(circuit, weight_shapes, weight_specs=weight_specse,output_dim=n_qubits)


    # Dense layer: classical post-processing
    outputs = layers.Dense(1, activation="sigmoid")

    # Create the Keras model
    model = tf.keras.models.Sequential([inputs, qlayer, outputs])
    return model

# Step 6: Compile the model
model = create_model()
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Step 7: Print model summary
model.summary()


# To train the model, use the following (assuming you have dataset X_train, y_train):
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
model.fit(X_train, y_train, epochs=10, batch_size=100, verbose=1)


Basic with angle



### Training loop with metrics
* Max entropy

Before excuting this code please make sure you excuted the right embedding code block.

In [1]:

!pip install catboost
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from random import shuffle
import warnings
import statsmodels.api as sm
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import RobustScaler
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree  import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn import tree
from sklearn import svm
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from sklearn.metrics import RocCurveDisplay
from sklearn.model_selection import GridSearchCV
from mlxtend.plotting import plot_confusion_matrix
from sklearn.ensemble import GradientBoostingClassifier
import xgboost as xgb
import lightgbm as lgb
from sklearn.naive_bayes import GaussianNB






# Turn off the warnings.
warnings.filterwarnings(action='ignore')
%matplotlib inline

def Evaluate(Model_Name, Model_Abb, X_test, Y_test):

    Pred_Value_prob= Model_Abb.predict(X_test)
    Pred_Value = (Pred_Value_prob > 0.5).astype(int)
    Accuracy = metrics.accuracy_score(Y_test,Pred_Value)
    Sensitivity = metrics.recall_score(Y_test,Pred_Value)
    Precision = metrics.precision_score(Y_test,Pred_Value)
    F1_score = metrics.f1_score(Y_test,Pred_Value)
    Recall = metrics.recall_score(Y_test,Pred_Value)

    print('--------------------------------------------------\n')
    print('The {} Model Accuracy   = {}\n'.format(Model_Name, np.round(Accuracy,3)))
    print('The {} Model Sensitvity = {}\n'.format(Model_Name, np.round(Sensitivity,3)))
    print('The {} Model Precision  = {}\n'.format(Model_Name, np.round(Precision,3)))
    print('The {} Model F1 Score   = {}\n'.format(Model_Name, np.round(F1_score,3)))
    print('The {} Model Recall     = {}\n'.format(Model_Name, np.round(Recall,3)))
    print('--------------------------------------------------\n')



Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



In [None]:
Evaluate("Amplitude Embedding model", model, X_test, y_test[:2000])

In [None]:
model.save_weights('/content/drive/MyDrive/Dataset_Quantum/Models/models_weights/weights_amplitude')