In [1]:
import pennylane as qml
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, cohen_kappa_score
import time
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

def preprocess_data(filepath, n_features=20):
    df = pd.read_csv(filepath)
    
    feature_cols = df.columns[:n_features]
    label_col = df.columns[-1]
    
    X = df[feature_cols].copy()
    y = df[label_col].copy()
    
    for col in X.columns:
        if X[col].dtype == 'object':
            if X[col].str.contains('.').any() and X[col].str.match(r'^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$').any():
                X[col] = X[col].apply(lambda x: sum(int(octet) * (256 ** (3-i)) for i, octet in enumerate(x.split('.'))))
            else:
                X[col] = pd.to_numeric(X[col], errors='coerce')
    
    X = X.replace([np.inf, -np.inf], np.nan)
    X = X.fillna(X.mean())
    
    # MinMax scaling for angle embedding (values between 0 and 2π)
    scaler = MinMaxScaler(feature_range=(0, 2*np.pi))
    X = scaler.fit_transform(X)
    
    le = LabelEncoder()
    y = le.fit_transform(y)
    
    return X, y, le.classes_

def create_angle_embedding_circuit(n_qubits):
    dev = qml.device("default.qubit", wires=n_qubits)
    
    @qml.qnode(dev)
    def angle_embedding_circuit(features):
        # Angle embedding
        qml.AngleEmbedding(features, wires=range(n_qubits))
        
        # Add entangling layers
        for i in range(n_qubits-1):
            qml.CRZ(0.5, wires=[i, i+1])
        
        # Add rotation layers
        for i in range(n_qubits):
            qml.Rot(features[i % len(features)], 0.5, 0.3, wires=i)
        
        return [qml.expval(qml.PauliZ(i)) for i in range(n_qubits)]
    
    return angle_embedding_circuit

def quantum_feature_extraction(X, n_qubits):
    circuit = create_angle_embedding_circuit(n_qubits)
    quantum_features = []
    batch_size = 1
    
    for i in tqdm(range(0, len(X), batch_size), desc="Quantum Processing"):
        batch = X[i:i + batch_size]
        batch_features = []
        
        for sample in batch:
            # Features are already scaled between 0 and 2π from preprocessing
            quantum_output = circuit(sample)
            batch_features.append(quantum_output)
            
        quantum_features.extend(batch_features)
    
    return np.array(quantum_features)

def evaluate_classifier(clf, X_train, X_test, y_train, y_test):
    start_fit = time.time()
    clf.fit(X_train, y_train)
    fit_time = time.time() - start_fit
    
    start_pred = time.time()
    y_pred = clf.predict(X_test)
    y_pred_proba = clf.predict_proba(X_test)
    pred_time = time.time() - start_pred
    
    metrics = {
        'Accuracy': accuracy_score(y_test, y_pred),
        'Precision': precision_score(y_test, y_pred, average='weighted', zero_division=1),
        'Recall': recall_score(y_test, y_pred, average='weighted', zero_division=1),
        'F1': f1_score(y_test, y_pred, average='weighted', zero_division=1),
        'ROC AUC': roc_auc_score(y_test, y_pred_proba, multi_class='ovr'),
        'Kappa': cohen_kappa_score(y_test, y_pred),
        'Fit Time': fit_time,
        'Predict Time': pred_time
    }
    return metrics



In [2]:
def main():
    X, y, classes = preprocess_data("Scenario-B-merged_5s.csv")
    
    classifiers = {
        'Extra Trees': ExtraTreesClassifier(n_estimators=100, random_state=42, class_weight='balanced'),
        'XGBoost': XGBClassifier(random_state=42, scale_pos_weight=1),
        'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42, class_weight='balanced')
    }
    
    # For angle embedding, we use number of qubits equal to feature dimension
    pca_dimensions = [2, 10] #,15]
    results = []
    
    for n_components in pca_dimensions:
        pca = PCA(n_components=n_components)
        X_pca = pca.fit_transform(X)
        
        X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)
        
        for clf_name, clf in classifiers.items():
            metrics = evaluate_classifier(clf, X_train, X_test, y_train, y_test)
            metrics.update({
                'Method': 'Classical',
                'PCA_dim': n_components,
                'Classifier': clf_name
            })
            results.append(metrics)
        
        # Quantum evaluation with angle embedding
        n_qubits = n_components  # Each feature maps to one qubit in angle embedding
        X_quantum = quantum_feature_extraction(X_pca, n_qubits)
        X_train_q, X_test_q, y_train_q, y_test_q = train_test_split(X_quantum, y, test_size=0.2, random_state=42)
        
        for clf_name, clf in classifiers.items():
            metrics = evaluate_classifier(clf, X_train_q, X_test_q, y_train_q, y_test_q)
            metrics.update({
                'Method': 'Quantum-Angle',
                'PCA_dim': n_components,
                'Classifier': clf_name
            })
            results.append(metrics)
    
    results_df = pd.DataFrame(results)
    print(results_df.to_string())
    results_df.to_csv('quantum_angle_classical_comparison_results.csv', index=False)

if __name__ == "__main__":
    main()


Quantum Processing: 100%|███████████████████████████████████████████████████████| 14508/14508 [00:51<00:00, 281.09it/s]
Quantum Processing: 100%|████████████████████████████████████████████████████████| 14508/14508 [03:25<00:00, 70.67it/s]


    Accuracy  Precision    Recall        F1   ROC AUC     Kappa  Fit Time  Predict Time         Method  PCA_dim     Classifier
0   0.884562   0.884451  0.884562  0.883624  0.968406  0.858438  1.019667      0.182865      Classical        2    Extra Trees
1   0.875258   0.875515  0.875258  0.874296  0.978225  0.846969  1.180087      0.024838      Classical        2        XGBoost
2   0.881461   0.881447  0.881461  0.880233  0.970465  0.854624  1.783494      0.116982      Classical        2  Random Forest
3   0.828739   0.823408  0.828739  0.824236  0.942848  0.789657  2.965364      1.421738  Quantum-Angle        2    Extra Trees
4   0.823915   0.818457  0.823915  0.819021  0.958766  0.783532  9.536019      0.065754  Quantum-Angle        2        XGBoost
5   0.828394   0.822769  0.828394  0.824030  0.946275  0.789194  2.306261      0.115220  Quantum-Angle        2  Random Forest
6   0.962440   0.962662  0.962440  0.962111  0.997341  0.954030  0.873850      0.131773      Classical       10