This cell is responsible for managing imports.

In [1]:
from qiskit.providers.aer import QasmSimulator # quantum simulator class
from qiskit_machine_learning.algorithms import QSVC # quantum support vector classifier class
from qiskit_machine_learning.kernels.quantum_kernel import QuantumKernel # wraps feature map and backend to give to QSVC
import qiskit_machine_learning.datasets # for data sets
import qiskit.circuit.library           # for feature maps
import numpy as np

The functions in this cell are responsible for loading and preparing data sets for input to the QSVM classifier.

In [12]:
# TODO: change this to allow specifying which classes to extract for binary classification,
# rather than simply extracting 2 arbitrary classes. If 2 arbitrary classes are specified anyway,
# at least make them manually selected and identifiable rather than seemingly random.
def extract_binary_classes(feature_array, label_array):
    """Takes a numpy array of feature vectors and a numpy array of labels
    and returns transformed numpy arrays with the number of classes reduced
    to 2."""
    classes = list(set(label_array))[:2] # get the first 2 unique labels as classes
    class_map = {classes[0]:0, classes[1]:1} # convert labels to 0 and 1 (needed for training step)
    # construct a feature and label description with information from only the first 2 classes
    features = []
    labels = []
    for (feature, label) in zip(feature_array, label_array):
        if label in classes:
            features.append(feature)
            labels.append(label)
    return (np.array(features), np.array(labels))

# MAYBE DO: make training_count and testing_count parameters
def prepare_data_set(dataset_loader_function, feature_count=4, make_binary=True):
    """Given a data set loading function, loads the training and testing features and labels
    and converts the problem to a binary classification problem."""
    # These variables could be parameterised to test different values or
    # left constant here so that all experiments have the same values.
    training_count = 10    # maximum number of training inputs to load (running time scales quadratically with this number)
    testing_count = 20     # maximum number of testing inputs to load (running time scales linearly with this number)
    # Note: for more information on what is happening here (and potentially how
    # to add custom data sets), check the source code of a qiskit_machine_learning
    # data set loading function
    training_features, training_labels, testing_features, testing_labels = dataset_loader_function(training_count, testing_count, feature_count, one_hot=False)
    # Convert the features and labels to a binary classification problem by removing classes
    # and corresponding features.
    if make_binary:
        training_features, training_labels = extract_binary_classes(training_features, training_labels)
        testing_features, testing_labels = extract_binary_classes(testing_features, testing_labels)
    return (training_features, training_labels, testing_features, testing_labels)


This cell defines a function that can be given some parameters determining a classifier, like the feature map to use, the data to train on, and the backend to run the training on.

In [3]:
# MAYBE DO: make batch size a parameter
def make_classifier(feature_map_instance, training_features, training_labels, backend):
    """Given a feature map instance, training features and labels, and a quantum backend,
    creates, trains, and returns a QSVM classifier."""
    # Create a quantum kernel from the feature map and
    # backend to give to the QSVC class.
    batch_size = 1000           # this is the QuantumKernel default
    quantum_kernel = QuantumKernel(feature_map=feature_map_instance, batch_size=batch_size, quantum_instance=backend)
    # Create a QSVC instance
    qsvc = QSVC(quantum_kernel=quantum_kernel)
    # Perform training
    qsvc.fit(training_features, training_labels)
    # return classifier instance
    return qsvc

This cell is similar to the above cell in that it in effect takes a specification for a classifier, but the function instead returns the generalisation metrics of the classifier that is described.

In [4]:
# TODO: finalize what generalisation metrics should be used and calculate them
# MAYBE DO: put parameters like feature count and  number of repetitions in
# the argument list to make them independent variables of the experiments rather
# than constants.
def process_combination(feature_map_class, dataset_loader_function, backend_instance):
    """Takes a feature map class, dataset loading function, and a backend, and
    returns the generalisation metrics of the combination of arguments."""
    # Create the feature map instance.
    feature_count = 4
    repetitions = 4
    feature_map_instance = feature_map_class(feature_dimension=feature_count, reps=repetitions)
    # load the data set for binary classification
    train_features, train_labels, test_features, test_labels = prepare_data_set(dataset_loader_function, feature_count=feature_count, make_binary=True)

    # create the classifier
    qsvc = make_classifier(feature_map_instance, train_features, train_labels, backend_instance)

    # get the classification accuracy on training and testing data as generalisation metrics
    train_accuracy = qsvc.score(train_features, train_labels)
    test_accuracy = qsvc.score(test_features, test_labels)
    # return the generalisation metrics
    return train_accuracy, test_accuracy


This cell defines a function that collects the generalisation information of different possible classifier configurations.

In [11]:
# TODO: maybe add ad-hoc datasets and look for other qiskit feature maps
def main():
    # list the datasets and feature maps to run experiments with
    dataset_loaders = [qiskit_machine_learning.datasets.digits,
                       qiskit_machine_learning.datasets.iris,
                       qiskit_machine_learning.datasets.breast_cancer,
                       qiskit_machine_learning.datasets.wine]
    feature_map_classes = [qiskit.circuit.library.PauliFeatureMap,
                           qiskit.circuit.library.ZFeatureMap,
                           qiskit.circuit.library.ZZFeatureMap]

    # create a quantum backend
    backend = QasmSimulator(method="density_matrix")  # should configure this to mimic IBMQ backend with QasmSimulator.from_backend(backend) method
    # evaluate generalisation ability for each combination of feature map and data set
    combination_count = len(dataset_loaders) * len(feature_map_classes)
    results = {} # dictionary for storing mapping of data loader functions and feature map class pairs to classification accuracies
    print(f"Trying {combination_count} combinations.")
    for (loader_index, loader) in enumerate(dataset_loaders):
        for (map_index, feature_map) in enumerate(feature_map_classes):
            # TODO: do something with results like drawing graphs
            print(f"Data set {loader_index}, feature map {map_index}")
            generalisation_metrics = process_combination(feature_map, loader, backend)
            results[(loader_index, map_index)] = generalisation_metrics
    return results

This cell can be evaluated to actually perform the experiments.

In [None]:
metrics = main()
for pair in metrics:
    loader_index = pair[0]
    map_index = pair[1]
    train_accuracy, test_accuracy = metrics[pair]
    print(f"Dataset {loader_index} with feature map {map_index} has training accuracy {train_accuracy} and testing accuracy {test_accuracy}.")