In [7]:
import pennylane as qml
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, cohen_kappa_score
import xgboost as xgb
from tqdm import tqdm
import time
import warnings
import ipaddress

warnings.filterwarnings('ignore')

# Function to convert IP to numeric
def ip_to_numeric(ip):
    try:
        return int(ipaddress.ip_address(ip))
    except:
        return 0

# Data preprocessing function
def preprocess_data(data):
    # Process each column based on its data type
    X=data
    for col in X.columns:
        if X[col].dtype == 'object':
            # Check if column contains IP addresses
            if X[col].str.contains('.').any() and X[col].str.match(r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$').any():
                # Convert IP to numerical while preserving structure
                X[col] = X[col].apply(lambda x: sum(int(octet) * (256 ** (3-i)) for i, octet in enumerate(x.split('.'))))
            else:
                # For non-IP columns that might be numeric
                X[col] = pd.to_numeric(X[col], errors='coerce')
        # Float columns remain as float
        elif X[col].dtype == 'float64':
            continue
    
    # Handle any remaining invalid values
    X = X.replace([np.inf, -np.inf], np.nan)
    X = X.fillna(X.mean())
    
    
    return X

# Quantum circuit for angle embedding
def create_quantum_circuit(n_qubits):
    dev = qml.device("default.qubit", wires=n_qubits)
    
    @qml.qnode(dev)
    def quantum_circuit(inputs):
        # Normalize input vectors to unit length for proper angle embedding
        inputs = inputs / np.linalg.norm(inputs)
        
        # Apply RY rotations for angle embedding
        for i in range(n_qubits):
            qml.RY(np.arccos(inputs[i]) * 2, wires=i)
        
        # Apply entangling layer (CZ gates)
        for i in range(n_qubits-1):
            qml.CZ(wires=[i, i+1])
        
        # Second rotation layer
        for i in range(n_qubits):
            qml.RY(np.pi/4, wires=i)
            
        # Measurement in computational basis
        return [qml.expval(qml.PauliZ(i)) for i in range(n_qubits)]
    
    return quantum_circuit


# Quantum feature extraction
def quantum_feature_extraction(X, n_qubits):
    quantum_circuit = create_quantum_circuit(n_qubits)
    quantum_features = []
    
    for sample in tqdm(X, desc="Quantum Processing"):
        features = quantum_circuit(sample)
        quantum_features.append(features)
    
    return np.array(quantum_features)

# Evaluation function
def evaluate_classifier(y_true, y_pred, y_pred_proba):
    metrics = {
        'Accuracy': accuracy_score(y_true, y_pred),
        'Precision': precision_score(y_true, y_pred, average='weighted'),
        'Recall': recall_score(y_true, y_pred, average='weighted'),
        'F1': f1_score(y_true, y_pred, average='weighted'),
        'ROC AUC': roc_auc_score(y_true, y_pred_proba, multi_class='ovr'),
        'Kappa': cohen_kappa_score(y_true, y_pred)
    }
    return metrics




In [8]:
# Main execution
def main():
    # Load data
    data = pd.read_csv('Scenario-B-merged_5s.csv')
    
    # Select first 20 columns + label column
    feature_cols = data.columns[:20].tolist()
    label_col = data.columns[28]  # The label column
    
    # Preprocess data
    X = preprocess_data(data[feature_cols])
    y = data[label_col]
    
    # Label encoding
    le = LabelEncoder()
    y = le.fit_transform(y)
    
    # Standard scaling
    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Define classifiers
    classifiers = {
        'Extra Trees': ExtraTreesClassifier(n_estimators=100, random_state=42),
        'XGBoost': xgb.XGBClassifier(random_state=42),
        'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42)
    }
    
    # PCA dimensions to test
    pca_dimensions = [2, 10, 15]
    
    results = []
    
    for n_components in pca_dimensions:
        # Classical PCA
        pca = PCA(n_components=n_components)
        X_train_pca = pca.fit_transform(X_train)
        X_test_pca = pca.transform(X_test)
        
        # Quantum feature extraction
        n_qubits = n_components
        X_train_quantum = quantum_feature_extraction(X_train_pca, n_qubits)
        X_test_quantum = quantum_feature_extraction(X_test_pca, n_qubits)
        
        for clf_name, clf in classifiers.items():
            # Classical approach
            start_fit = time.time()
            clf.fit(X_train_pca, y_train)
            fit_time = time.time() - start_fit
            
            start_predict = time.time()
            y_pred = clf.predict(X_test_pca)
            y_pred_proba = clf.predict_proba(X_test_pca)
            predict_time = time.time() - start_predict
            
            metrics = evaluate_classifier(y_test, y_pred, y_pred_proba)
            metrics.update({'Fit Time': fit_time, 'Predict Time': predict_time})
            results.append({
                'PCA': n_components,
                'Method': 'Classical',
                'Classifier': clf_name,
                **metrics
            })
            
            # Quantum approach
            clf_quantum = clf.__class__(**clf.get_params())
            
            start_fit = time.time()
            clf_quantum.fit(X_train_quantum, y_train)
            fit_time = time.time() - start_fit
            
            start_predict = time.time()
            y_pred = clf_quantum.predict(X_test_quantum)
            y_pred_proba = clf_quantum.predict_proba(X_test_quantum)
            predict_time = time.time() - start_predict
            
            metrics = evaluate_classifier(y_test, y_pred, y_pred_proba)
            metrics.update({'Fit Time': fit_time, 'Predict Time': predict_time})
            results.append({
                'PCA': n_components,
                'Method': 'Quantum',
                'Classifier': clf_name,
                **metrics
            })
    
    # Create results DataFrame
    results_df = pd.DataFrame(results)
    print("\nResults:")
    print(results_df.to_string())
    
    # Save results
    results_df.to_csv('classification_results.csv', index=False)

if __name__ == "__main__":
    main()

Quantum Processing: 100%|███████████████████████████████████████████████████████| 11606/11606 [01:04<00:00, 179.09it/s]
Quantum Processing: 100%|█████████████████████████████████████████████████████████| 2902/2902 [00:10<00:00, 265.75it/s]
Quantum Processing: 100%|████████████████████████████████████████████████████████| 11606/11606 [04:49<00:00, 40.03it/s]
Quantum Processing: 100%|██████████████████████████████████████████████████████████| 2902/2902 [01:00<00:00, 47.81it/s]
Quantum Processing: 100%|████████████████████████████████████████████████████████| 11606/11606 [22:04<00:00,  8.76it/s]
Quantum Processing: 100%|██████████████████████████████████████████████████████████| 2902/2902 [05:58<00:00,  8.10it/s]



Results:
    PCA     Method     Classifier  Accuracy  Precision    Recall        F1   ROC AUC     Kappa   Fit Time  Predict Time
0     2  Classical    Extra Trees  0.807719   0.805342  0.807719  0.804920  0.937715  0.763912   1.197822      0.246316
1     2    Quantum    Extra Trees  0.501378   0.504992  0.501378  0.503049  0.715841  0.390900   1.989539      0.275388
2     2  Classical        XGBoost  0.811854   0.808290  0.811854  0.805165  0.959092  0.768035  19.564713      0.062393
3     2    Quantum        XGBoost  0.614059   0.593482  0.614059  0.577536  0.862886  0.509544   1.346191      0.068851
4     2  Classical  Random Forest  0.805307   0.805060  0.805307  0.803661  0.946784  0.761031  15.093781      0.123917
5     2    Quantum  Random Forest  0.500345   0.502771  0.500345  0.501465  0.780882  0.389376   3.125525      0.193874
6    10  Classical    Extra Trees  0.928670   0.930073  0.928670  0.928417  0.992028  0.912598   1.367862      0.364647
7    10    Quantum    Extra Tr

In [6]:
data=pd.read_csv('Scenario-B-merged_5s.csv')
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14508 entries, 0 to 14507
Data columns (total 29 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Source IP          14508 non-null  object 
 1    Source Port       14508 non-null  int64  
 2    Destination IP    14508 non-null  object 
 3    Destination Port  14508 non-null  int64  
 4    Protocol          14508 non-null  int64  
 5    Flow Duration     14508 non-null  int64  
 6    Flow Bytes/s      14508 non-null  float64
 7    Flow Packets/s    14508 non-null  float64
 8    Flow IAT Mean     14508 non-null  float64
 9    Flow IAT Std      14508 non-null  float64
 10   Flow IAT Max      14508 non-null  int64  
 11   Flow IAT Min      14508 non-null  int64  
 12  Fwd IAT Mean       14508 non-null  float64
 13   Fwd IAT Std       14508 non-null  float64
 14   Fwd IAT Max       14508 non-null  int64  
 15   Fwd IAT Min       14508 non-null  int64  
 16  Bwd IAT Mean       145