In [1]:
!pip install pennylane pandas numpy scikit-learn matplotlib seaborn



In [2]:
import pennylane as qml
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.base import BaseEstimator, ClassifierMixin

In [3]:
df = pd.read_csv('C:/Users/SWATI SARASWATHI.S.J/Downloads/GDSC2-dataset.csv')

# Extract features and target
X = df[['MIN_CONC', 'MAX_CONC', 'LN_IC50', 'AUC', 'RMSE']].values
y = df['Z_SCORE'].values > 0  # Binary classification based on Z-score

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Set up a PennyLane device
n_qubits = X_train.shape[1]  # One qubit per feature
dev = qml.device("default.qubit", wires=n_qubits)

In [4]:
# Define a quantum circuit for data encoding
@qml.qnode(dev)
def quantum_circuit(x):
    # Encode features
    for i in range(n_qubits):
        qml.RY(x[i], wires=i)
    
    # Create entanglement
    for i in range(n_qubits-1):
        qml.CNOT(wires=[i, i+1])
    qml.CNOT(wires=[n_qubits-1, 0])  # Close the loop
    
    # Second encoding layer
    for i in range(n_qubits):
        qml.RY(x[i], wires=i)
        
    # Return the quantum state
    return qml.state()

In [5]:
# Define quantum kernel function
def quantum_kernel(x1, x2):
    state1 = quantum_circuit(x1)
    state2 = quantum_circuit(x2)
    inner_product = np.vdot(state1, state2)
    # Extract scalar value if inner_product is an array
    if hasattr(inner_product, 'shape') and inner_product.shape:
        inner_product = inner_product.item()  # Convert to scalar
    return np.float32(np.abs(inner_product)**2)

class OnTheFlyQuantumSVM(BaseEstimator, ClassifierMixin):
    """
    SVM classifier using on-the-fly quantum kernel computation
    instead of precomputing the full kernel matrix
    """
    def __init__(self, C=1.0, max_iter=50, tol=1e-3):
        self.C = C
        self.max_iter = max_iter
        self.tol = tol
        self.support_vectors_ = None
        self.dual_coef_ = None
        self.classes_ = None
        self.intercept_ = None
        self._X_train = None
        
    def _compute_kernel_row(self, x, X_support):
        """Compute kernel values between x and all support vectors"""
        kernel_row = np.zeros(len(X_support), dtype=np.float32)
        for i, sv in enumerate(X_support):
            kernel_row[i] = quantum_kernel(x, sv)
        return kernel_row
    
    def fit(self, X, y):
        """Fit the SVM model using custom kernel"""
        print("Fitting Quantum SVM with on-the-fly kernel calculation...")
        self.classes_ = np.unique(y)
        self._X_train = X
        
        # Step 1: Use LinearSVC to get an initial set of support vectors
        from sklearn.svm import LinearSVC
        print("Identifying potential support vectors...")
        linear_svc = LinearSVC(C=self.C)
        linear_svc.fit(X, y)
        
        # Step 2: Select a reasonable subset of samples likely to be support vectors
        print("Selecting subset of potential support vectors...")
        distances = np.abs(linear_svc.decision_function(X))
        max_subset_size = min(300, len(X))  # Limit subset size
        closest_idx = np.argsort(distances)[:max_subset_size]  # Take points closest to the margin
        X_subset = X[closest_idx]
        y_subset = y[closest_idx]
        
        # Step 3: Compute kernel matrix just for this subset
        print(f"Computing kernel matrix for subset of {len(X_subset)} samples...")
        K_subset = np.zeros((len(X_subset), len(X_subset)), dtype=np.float32)
        for i, x1 in enumerate(X_subset):
            for j, x2 in enumerate(X_subset):
                K_subset[i, j] = quantum_kernel(x1, x2)
        
        # Step 4: Train SVC on this subset with precomputed kernel
        print("Training SVM on subset with precomputed kernel...")
        subset_svm = SVC(C=self.C, kernel='precomputed', max_iter=self.max_iter, tol=self.tol)
        subset_svm.fit(K_subset, y_subset)
        
        # Step 5: Extract support vectors from the subset
        sv_indices_in_subset = subset_svm.support_
        self.support_vectors_ = X_subset[sv_indices_in_subset]
        self.dual_coef_ = subset_svm.dual_coef_
        self.intercept_ = subset_svm.intercept_
        
        print(f"Model trained with {len(self.support_vectors_)} support vectors")
        return self
    
    def decision_function(self, X):
        """Compute decision function values for samples in X"""
        if self.support_vectors_ is None:
            raise Exception("Model not fitted yet")
        
        print(f"Computing decision function for {len(X)} samples...")
        decision_values = np.zeros(len(X))
        
        intercept = self.intercept_.item() if hasattr(self.intercept_, 'item') else self.intercept_
        
        # For each test sample
        for i, x in enumerate(X):
            # Compute kernel values between x and all support vectors on-the-fly
            kernel_row = self._compute_kernel_row(x, self.support_vectors_)
            
            # Decision function value = sum(alpha_i * y_i * K(x_i, x)) + b
            decision_values[i] = np.sum(self.dual_coef_ * kernel_row) + intercept
            
        return decision_values
    
    def predict(self, X):
        """Predict class labels for samples in X"""
        decision = self.decision_function(X)
        if len(self.classes_) == 2:
            return np.where(decision > 0, self.classes_[1], self.classes_[0])
        else:
            raise NotImplementedError("Multi-class not implemented yet")

In [6]:
print("Training quantum SVM with on-the-fly kernel calculation...")
qsvm_on_the_fly = OnTheFlyQuantumSVM(C=1.0)
qsvm_on_the_fly.fit(X_train, y_train)
y_pred_quantum = qsvm_on_the_fly.predict(X_test)

# Evaluate quantum SVM
q_accuracy = accuracy_score(y_test, y_pred_quantum)
q_precision = precision_score(y_test, y_pred_quantum)
q_recall = recall_score(y_test, y_pred_quantum)
q_f1 = f1_score(y_test, y_pred_quantum)

print("\nQuantum SVM Results:")
print(f"Accuracy: {q_accuracy:.4f}")
print(f"Precision: {q_precision:.4f}")
print(f"Recall: {q_recall:.4f}")
print(f"F1 Score: {q_f1:.4f}")

Training quantum SVM with on-the-fly kernel calculation...
Fitting Quantum SVM with on-the-fly kernel calculation...
Identifying potential support vectors...
Selecting subset of potential support vectors...
Computing kernel matrix for subset of 300 samples...
Training SVM on subset with precomputed kernel...
Model trained with 100 support vectors
Computing decision function for 60509 samples...





Quantum SVM Results:
Accuracy: 0.4882
Precision: 0.4903
Recall: 0.4030
F1 Score: 0.4424


In [7]:
# Use precomputed kernel with SVC
print("Training quantum SVM...")
qsvm = SVC(kernel='precomputed')
qsvm.fit(K_train, y_train)
y_pred_quantum = qsvm.predict(K_test)

# Evaluate quantum SVM
q_accuracy = accuracy_score(y_test, y_pred_quantum)
q_precision = precision_score(y_test, y_pred_quantum)
q_recall = recall_score(y_test, y_pred_quantum)
q_f1 = f1_score(y_test, y_pred_quantum)

print("\nQuantum SVM Results:")
print(f"Accuracy: {q_accuracy:.4f}")
print(f"Precision: {q_precision:.4f}")
print(f"Recall: {q_recall:.4f}")
print(f"F1 Score: {q_f1:.4f}")

# Compare with classical SVM
print("\nTraining classical SVM...")
classical_svm = SVC(kernel='rbf')
classical_svm.fit(X_train, y_train)
y_pred_classical = classical_svm.predict(X_test)

# Calculate classical performance
c_accuracy = accuracy_score(y_test, y_pred_classical)
c_precision = precision_score(y_test, y_pred_classical)
c_recall = recall_score(y_test, y_pred_classical)
c_f1 = f1_score(y_test, y_pred_classical)

print("\nClassical SVM Results:")
print(f"Accuracy: {c_accuracy:.4f}")
print(f"Precision: {c_precision:.4f}")
print(f"Recall: {c_recall:.4f}")
print(f"F1 Score: {c_f1:.4f}")

# Plot confusion matrices
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

# Quantum SVM confusion matrix
q_cm = confusion_matrix(y_test, y_pred_quantum)
sns.heatmap(q_cm, annot=True, fmt='d', cmap='Blues', ax=ax1)
ax1.set_xlabel('Predicted Labels')
ax1.set_ylabel('True Labels')
ax1.set_title('Quantum SVM Confusion Matrix')

# Classical SVM confusion matrix
c_cm = confusion_matrix(y_test, y_pred_classical)
sns.heatmap(c_cm, annot=True, fmt='d', cmap='Blues', ax=ax2)
ax2.set_xlabel('Predicted Labels')
ax2.set_ylabel('True Labels')
ax2.set_title('Classical SVM Confusion Matrix')

plt.tight_layout()
plt.show()

Training quantum SVM...


NameError: name 'K_train' is not defined

In [None]:
# Compare performance metrics
metrics = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'F1 Score'],
    'Quantum SVM': [q_accuracy, q_precision, q_recall, q_f1],
    'Classical SVM': [c_accuracy, c_precision, c_recall, c_f1]
})

plt.figure(figsize=(10, 6))
metrics_melted = pd.melt(metrics, id_vars=['Metric'], var_name='Model', value_name='Score')
sns.barplot(x='Metric', y='Score', hue='Model', data=metrics_melted)
plt.ylim(0, 1)
plt.title('Performance Comparison: Quantum vs Classical SVM')
plt.show()

In [None]:
# Feature importance analysis
feature_names = ['MIN_CONC', 'MAX_CONC', 'LN_IC50', 'AUC', 'RMSE']

# Test each feature's contribution by computing kernel with only that feature
feature_importance = []
for i in range(len(feature_names)):
    # Create mask where only one feature is active
    mask = np.zeros(n_qubits, dtype=bool)
    mask[i] = True
    
    # Compute kernels with just this feature
    X_train_masked = X_train.copy()
    X_train_masked[:, ~mask] = 0
    X_test_masked = X_test.copy()
    X_test_masked[:, ~mask] = 0
    
    K_train_single = kernel_matrix(X_train_masked, X_train_masked)
    K_test_single = kernel_matrix(X_test_masked, X_train_masked)
    
    # Train and evaluate model with just this feature
    qsvm_single = SVC(kernel='precomputed')
    qsvm_single.fit(K_train_single, y_train)
    y_pred_single = qsvm_single.predict(K_test_single)
    
    # Calculate accuracy with just this feature
    acc_single = accuracy_score(y_test, y_pred_single)
    feature_importance.append(acc_single)

# Normalize feature importances
feature_importance = np.array(feature_importance)
feature_importance = feature_importance / np.sum(feature_importance)

# Plot feature importance
plt.figure(figsize=(10, 6))
sns.barplot(x=feature_names, y=feature_importance)
plt.title('Feature Importance in Quantum Kernel Classification')
plt.ylabel('Normalized Importance')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()