In [None]:
import numpy as np
import psutil  # For memory tracking
import pandas as pd
import joblib  # For model saving/loading
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from qiskit.circuit.library import ZZFeatureMap
from qiskit_machine_learning.algorithms import QSVC
from qiskit_machine_learning.kernels import FidelityQuantumKernel
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import time

# Limit dataset size to prevent memory issues
#MAX_SAMPLES = 2500   

class QSVM:
    def __init__(self, data, pca_components, reps=1):
        print("Initializing QSVM...")
        self.data = data
        self.pca_components = pca_components
        self.reps = reps
        self.apply_pca()
        self.initialize_quantum_kernel()

    def apply_pca(self):
        """Reduce feature dimensions using PCA"""
        print("Applying PCA...")
        pca = PCA(n_components=self.pca_components)
        self.data.train_features = pca.fit_transform(self.data.train_features)
        self.data.test_features = pca.transform(self.data.test_features)
        print(f"PCA Variance Ratio: {sum(pca.explained_variance_ratio_):.4f}")

    def initialize_quantum_kernel(self):
        """Set up the quantum feature map and fidelity kernel"""
        print("Initializing quantum kernel...")
        feature_map = ZZFeatureMap(feature_dimension=self.pca_components, reps=self.reps, entanglement="linear")
        self.quantum_kernel = FidelityQuantumKernel(feature_map=feature_map)

    def fit(self):
        """Train QSVM on quantum kernel with memory tracking"""
        print("Training QSVM...")

        # Track memory usage before training
        process = psutil.Process()
        mem_before = process.memory_info().rss / (1024 * 1024)  # Convert to MB

        start_time = time.time()
        self.qsvc = QSVC(quantum_kernel=self.quantum_kernel, C=1.0)
        self.qsvc.fit(self.data.train_features, self.data.train_labels)
        end_time = time.time()

        # Track memory usage after training
        mem_after = process.memory_info().rss / (1024 * 1024)  # Convert to MB

        print(f"QSVM training complete. Time taken: {(end_time - start_time) / 60:.2f} minutes.")
        print(f"Memory Usage: {mem_after - mem_before:.2f} MB (during training)")

    def evaluate(self):
        """Evaluate QSVM using accuracy metrics and memory tracking"""
        print("Evaluating QSVM...")

        # Track memory usage before evaluation
        process = psutil.Process()
        mem_before = process.memory_info().rss / (1024 * 1024)  # Convert to MB

        start_time = time.time()
        predictions = self.qsvc.predict(self.data.test_features)
        end_time = time.time()

        # Track memory usage after evaluation
        mem_after = process.memory_info().rss / (1024 * 1024)  # Convert to MB

        print(f"QSVM evaluation complete. Time taken: {(end_time - start_time) / 60:.2f} minutes.")
        print(f"Memory Usage: {mem_after - mem_before:.2f} MB (during evaluation)")

        # Compute performance metrics
        acc = accuracy_score(self.data.test_labels, predictions)
        prec = precision_score(self.data.test_labels, predictions, average="weighted")
        rec = recall_score(self.data.test_labels, predictions, average="weighted")
        f1 = f1_score(self.data.test_labels, predictions, average="weighted")
        conf_matrix = confusion_matrix(self.data.test_labels, predictions)

        # Display results
        print(f"QSVM Accuracy: {acc:.4f}")
        print(f"Precision: {prec:.4f}")
        print(f"Recall: {rec:.4f}")
        print(f"F1 Score: {f1:.4f}")
        print("Confusion Matrix:\n", conf_matrix)

        return {
            "accuracy": acc,
            "precision": prec,
            "recall": rec,
            "f1_score": f1,
            "confusion_matrix": conf_matrix
        }
    
    def save_model(self, filename="qsvm_midmodel.pkl"):
        """Save the trained QSVM model"""
        if not hasattr(self, "qsvc") or self.qsvc is None:
            print("Error: No trained model found. Train the QSVM first.")
            return

        print("Saving QSVM model...")
        joblib.dump(self.qsvc, filename)
        print(f"QSVM model saved to {filename}")

    def load_model(self, filename="qsvm_midmodel.pkl"):
        """Load a previously saved QSVM model"""
        if not os.path.exists(filename):
            print(f"Error: Model file {filename} not found.")
            return

        print("Loading QSVM model...")
        self.qsvc = joblib.load(filename)
        print(f"QSVM model loaded from {filename}")

class Data:
    def __init__(self, filename):
        print("Initializing dataset...")
        self.filename = filename
        self.load_data()
        self.prepare_datasets()

    def load_data(self):
        """Load dataset and apply necessary preprocessing"""
        print("Loading dataset...")
        self.df = pd.read_csv(self.filename)

    def prepare_datasets(self):
        """Split dataset into train/test and scale features"""
        print("Preparing datasets...")

        # Extract features and labels
        features = self.df.drop(columns=["HAS ADHD"]).to_numpy()
        labels = self.df["HAS ADHD"].to_numpy()

        # Scale features to the range (0, 2Ï€)
        scaler = MinMaxScaler(feature_range=(0, 2 * np.pi))
        features = scaler.fit_transform(features)

        # Stratified train-test split
        train_features, test_features, train_labels, test_labels = train_test_split(
            features, labels, test_size=0.2, stratify=labels, random_state=42
        )

        # Class distribution in train dataset
        unique, counts = np.unique(train_labels, return_counts=True)
        class_distribution = dict(zip(unique, counts))
        print(f"Class Distribution in Train Dataset: {class_distribution}")

        # Store processed features and labels
        self.train_features, self.train_labels = train_features, train_labels
        self.test_features, self.test_labels = test_features, test_labels

        print(f"Final Train set size: {len(self.train_features)}, Test set size: {len(self.test_features)}")
        


# Load dataset and train QSVM
dataset = Data('MLsheet - SRSno-avg.csv')
qsvm = QSVM(dataset, pca_components=6, reps=2)
qsvm.fit()

# Evaluate QSVM performance
metrics = qsvm.evaluate()

# Save the trained model for later use
qsvm.save_model()

Initializing dataset...
Loading dataset...
Preparing datasets...
Class Distribution in Train Dataset: {np.int64(0): np.int64(834), np.int64(1): np.int64(806)}
Final Train set size: 1640, Test set size: 410
Initializing QSVM...
Applying PCA...
PCA Variance Ratio: 1.0000
Initializing quantum kernel...
Training QSVM...
QSVM training complete. Time taken: 71.65 minutes.
Memory Usage: 7925.36 MB (during training)
Evaluating QSVM...
QSVM evaluation complete. Time taken: 44.47 minutes.
Memory Usage: 10413.93 MB (during evaluation)
QSVM Accuracy: 0.9707
Precision: 0.9708
Recall: 0.9707
F1 Score: 0.9707
Confusion Matrix:
 [[202   7]
 [  5 196]]
Saving QSVM model...
QSVM model saved to qsvm_midmodel.pkl
