In [None]:
import numpy as np
import psutil  # For memory tracking
import pandas as pd
import joblib  # For model saving/loading
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from qiskit.circuit.library import ZZFeatureMap
from qiskit_machine_learning.kernels import FidelityQuantumKernel
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC  # Classical SVM
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import time
import os

class HybridSVM:
    def __init__(self, data, pca_components, reps=1):
        print("Initializing Hybrid SVM (Quantum Kernel)...")
        self.data = data
        self.pca_components = pca_components
        self.reps = reps
        self.apply_pca()
        self.initialize_quantum_kernel()

    def apply_pca(self):
        """Reduce feature dimensions using PCA"""
        print("Applying PCA...")
        pca = PCA(n_components=self.pca_components)
        self.data.train_features = pca.fit_transform(self.data.train_features)
        self.data.test_features = pca.transform(self.data.test_features)
        print(f"PCA Variance Ratio: {sum(pca.explained_variance_ratio_):.4f}")

    def initialize_quantum_kernel(self):
        """Set up the quantum feature map and fidelity kernel"""
        print("Initializing quantum kernel...")
        feature_map = ZZFeatureMap(feature_dimension=self.pca_components, reps=self.reps, entanglement="linear")
        self.quantum_kernel = FidelityQuantumKernel(feature_map=feature_map)

    @staticmethod
    def svm_hyperparameter_tuning(train_features, train_labels):
        """Tune SVM hyperparameters using GridSearchCV"""
        print("Starting hyperparameter tuning...")

        param_grid = {
            'C': [0.1, 1, 10, 100],
            'kernel': ['linear', 'rbf', 'poly', 'sigmoid'],
            'gamma': ['scale', 'auto', 0.01, 0.1, 1],
            'degree': [2, 3, 4]  # Only for 'poly'
        }

        svc = SVC()
        grid_search = GridSearchCV(svc, param_grid, cv=4, scoring='accuracy', verbose=1, n_jobs=-1)
        grid_search.fit(train_features, train_labels)

        print(f"Best Parameters: {grid_search.best_params_}")
        print(f"Best Cross-Validation Score: {grid_search.best_score_:.4f}")

        return grid_search.best_estimator_

    def fit(self):
        """Train Hybrid SVM with memory tracking and hyperparameter tuning"""
        print("Training Hybrid SVM...")

        quantum_train_features = self.quantum_kernel.evaluate(self.data.train_features, self.data.train_features)

        process = psutil.Process()
        mem_before = process.memory_info().rss / (1024 * 1024)

        start_time = time.time()
        self.svm = self.svm_hyperparameter_tuning(quantum_train_features, self.data.train_labels)
        end_time = time.time()

        mem_after = process.memory_info().rss / (1024 * 1024)
        elapsed = end_time - start_time

        if elapsed > 60:
            print(f"Hybrid SVM training complete. Time taken: {elapsed / 60:.2f} minutes.")
        else:
            print(f"Hybrid SVM training complete. Time taken: {elapsed:.2f} seconds.")

        print(f"Memory Usage: {mem_after - mem_before:.2f} MB (during training)")

    def evaluate(self):
        """Evaluate Hybrid SVM using accuracy metrics and memory tracking"""
        print("Evaluating Hybrid SVM...")

        quantum_test_features = self.quantum_kernel.evaluate(self.data.test_features, self.data.train_features)

        process = psutil.Process()
        mem_before = process.memory_info().rss / (1024 * 1024)

        start_time = time.time()
        predictions = self.svm.predict(quantum_test_features)
        end_time = time.time()

        mem_after = process.memory_info().rss / (1024 * 1024)
        elapsed = end_time - start_time

        if elapsed > 60:
            print(f"Hybrid SVM evaluation complete. Time taken: {elapsed / 60:.2f} minutes.")
        else:
            print(f"Hybrid SVM evaluation complete. Time taken: {elapsed:.2f} seconds.")

        print(f"Memory Usage: {mem_after - mem_before:.2f} MB (during evaluation)")

        acc = accuracy_score(self.data.test_labels, predictions)
        prec = precision_score(self.data.test_labels, predictions, average="weighted")
        rec = recall_score(self.data.test_labels, predictions, average="weighted")
        f1 = f1_score(self.data.test_labels, predictions, average="weighted")
        conf_matrix = confusion_matrix(self.data.test_labels, predictions)

        print(f"Hybrid SVM Accuracy: {acc:.4f}")
        print(f"Precision: {prec:.4f}")
        print(f"Recall: {rec:.4f}")
        print(f"F1 Score: {f1:.4f}")
        print("Confusion Matrix:\n", conf_matrix)

        return {
            "accuracy": acc,
            "precision": prec,
            "recall": rec,
            "f1_score": f1,
            "confusion_matrix": conf_matrix
        }

    def save_model(self, filename="Saved_Models/hybrid_svm_small.pkl"):
        """Save the trained Hybrid SVM model"""
        if not hasattr(self, "svm") or self.svm is None:
            print("Error: No trained model found. Train the Hybrid SVM first.")
            return

        print("Saving Hybrid SVM model...")
        joblib.dump(self.svm, filename)
        print(f"Hybrid SVM model saved to {filename}")

class Data:
    def __init__(self, filename):
        print("Initializing dataset...")
        self.filename = filename
        self.load_data()
        self.prepare_datasets()

    def load_data(self):
        """Load dataset and apply necessary preprocessing"""
        print("Loading dataset...")
        self.df = pd.read_csv(self.filename)

    def prepare_datasets(self):
        """Split dataset into train/test and scale features"""
        print("Preparing datasets...")

        features = self.df.drop(columns=["HeartDisease"]).to_numpy()
        labels = self.df["HeartDisease"].to_numpy()

        scaler = MinMaxScaler(feature_range=(0, 2 * np.pi))
        features = scaler.fit_transform(features)

        train_features, test_features, train_labels, test_labels = train_test_split(
            features, labels, test_size=0.2, stratify=labels, random_state=42
        )

        unique, counts = np.unique(train_labels, return_counts=True)
        class_distribution = dict(zip(unique, counts))
        print(f"Class Distribution in Train Dataset: {class_distribution}")

        self.train_features, self.train_labels = train_features, train_labels
        self.test_features, self.test_labels = test_features, test_labels

        print(f"Final Train set size: {len(self.train_features)}, Test set size: {len(self.test_features)}")

dataset = Data('Heart Prediction Quantum Dataset.csv')
hybrid_svm = HybridSVM(dataset, pca_components=6, reps=2)
hybrid_svm.fit()
metrics = hybrid_svm.evaluate()
hybrid_svm.save_model()


Initializing dataset...
Loading dataset...
Preparing datasets...
Class Distribution in Train Dataset: {np.int64(0): np.int64(160), np.int64(1): np.int64(240)}
Final Train set size: 400, Test set size: 100
Initializing Hybrid SVM (Quantum Kernel)...
Applying PCA...
PCA Variance Ratio: 1.0000
Initializing quantum kernel...
Training Hybrid SVM...
Starting hyperparameter tuning...
Fitting 4 folds for each of 240 candidates, totalling 960 fits
Best Parameters: {'C': 1, 'degree': 3, 'gamma': 'scale', 'kernel': 'poly'}
Best Cross-Validation Score: 0.6200
Hybrid SVM training complete. Time taken: 14.72 seconds.
Memory Usage: -2.47 MB (during training)
Evaluating Hybrid SVM...
Hybrid SVM evaluation complete. Time taken: 0.00 seconds.
Memory Usage: 0.00 MB (during evaluation)
Hybrid SVM Accuracy: 0.6000
Precision: 0.5612
Recall: 0.6000
F1 Score: 0.4671
Confusion Matrix:
 [[ 1 39]
 [ 1 59]]
Saving Hybrid SVM model...
Hybrid SVM model saved to hybrid_svm_small.pkl
