In [3]:
# General Imports
import numpy as np
import pandas as pd


# Visualisation Imports
import matplotlib.pyplot as plt

# Scikit Imports
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.datasets import load_breast_cancer

# Qiskit Imports
from qiskit import Aer, execute
from qiskit.circuit import QuantumCircuit, Parameter, ParameterVector
from qiskit.circuit.library import PauliFeatureMap, ZFeatureMap, ZZFeatureMap
from qiskit.circuit.library import TwoLocal, NLocal, RealAmplitudes, EfficientSU2
from qiskit.circuit.library import HGate, RXGate, RYGate, RZGate, CXGate, CRXGate, CRZGate
from qiskit_machine_learning.kernels import QuantumKernel

In [4]:
df = load_breast_cancer()
name = 'breast_cancer'

In [5]:
#Breast Cancer Dataset

data = pd.DataFrame(data=df, columns=df.feature_names)
data.columns = ['feature{}'.format(i) for i in range(0,data.shape[1])]

dataset = pd.DataFrame(df.data).assign(target=df.target)
print (dataset.shape,pd.DataFrame(df.target).nunique().tolist()[-1:])

(569, 31) [2]


In [4]:
# Split dataset
sample_train, sample_test, label_train, label_test = train_test_split(
     df.data, df.target, test_size=0.2, random_state=22)

# Reduce dimensions (the dataset now has a dimensionality of 31 attributes)
n_dim = 30 #number of qubits we want to use
pca = PCA(n_components=n_dim).fit(sample_train)
sample_train = pca.transform(sample_train)
sample_test = pca.transform(sample_test)

# Normalise
std_scale = StandardScaler().fit(sample_train)
sample_train = std_scale.transform(sample_train)
sample_test = std_scale.transform(sample_test)

# Scale
samples = np.append(sample_train, sample_test, axis=0)
minmax_scale = MinMaxScaler((-1, 1)).fit(samples)
sample_train = minmax_scale.transform(sample_train)
sample_test = minmax_scale.transform(sample_test)

# Select
train_size = 100
sample_train = sample_train[:train_size]
label_train = label_train[:train_size]

test_size = 20
sample_test = sample_test[:test_size]
label_test = label_test[:test_size]

In [5]:
print(sample_train[0], label_train[0])
print(sample_test[0], label_test[0])

[-0.67877167 -0.32887698 -0.64391848 -0.50048413  0.20973561  1.
 -0.24937677 -0.33193207 -0.29526755 -0.07666853  0.22008304 -0.05974142
  0.04167625  0.08064722 -0.25052035  0.38897202 -0.5436402   0.28774898
  0.00547205 -0.28492198  0.44259583  0.43093191 -0.37247299 -0.62579648
 -0.35858197 -0.07113956  0.17223689  0.64485952 -0.4449351  -0.1369305 ] 0
[-0.68758509  0.0201601  -0.71602497  0.1402525   0.22277054 -0.20188052
  0.20999774 -0.07640624 -0.1460066  -0.43104741 -0.22748698 -0.0075966
 -0.21643985 -0.12164196 -0.28725603 -0.18949382 -0.41093089 -0.02013025
  0.03987041 -0.1693084   0.31341618  0.01503809 -0.17979486 -0.36846553
 -0.02033012 -0.02271395 -0.17458868 -0.1318063  -0.28286751 -0.16740533] 1


With our training and testing datasets ready, we set up the QuantumKernel class with the ZZFeatureMap, and use the BasicAer statevector_simulator to estimate the training and testing kernel matrices.

In [6]:
zz_map = ZZFeatureMap(feature_dimension=30, reps=2, entanglement='linear', insert_barriers=True)

zz_kernel = QuantumKernel(feature_map=zz_map, quantum_instance=Aer.get_backend('statevector_simulator'))

Let's calculate the transition amplitude between the first and second training data samples, one of the entries in the training kernel matrix.

Let's createe a circuit for the first sample and try to simulate

In [10]:
zz_circuit = zz_kernel.construct_circuit(sample_train[0], sample_train[1])
#zz_circuit.decompose().decompose().draw(output='mpl')

We then simulate the circuit. We will use the `qasm_simulator` since the circuit contains measurements, but increase the number of shots to reduce the effect of sampling noise.

In [None]:
backend = Aer.get_backend('qasm_simulator')
job = execute(zz_circuit, backend, shots=8192, 
              seed_simulator=1024, seed_transpiler=1024)
counts = job.result().get_counts(zz_circuit)

This process is then repeated for each pair of training data samples to fill in the training kernel matrix, and between each training and testing data sample to fill in the testing kernel matrix. Note that each matrix is symmetric, so to reduce computation time, only half the entries are calculated explictly. 

In [None]:
matrix_train = zz_kernel.evaluate(x_vec=sample_train)
matrix_test = zz_kernel.evaluate(x_vec=sample_test, y_vec=sample_train)

fig, axs = plt.subplots(1, 2, figsize=(10, 5))
axs[0].imshow(np.asmatrix(matrix_train),
              interpolation='nearest', origin='upper', cmap='Blues')
axs[0].set_title("training kernel matrix")
axs[1].imshow(np.asmatrix(matrix_test),
              interpolation='nearest', origin='upper', cmap='Reds')
axs[1].set_title("testing kernel matrix")
plt.show()

In [None]:
zzcb_svc = SVC(kernel=zz_kernel.evaluate)
zzcb_svc.fit(sample_train, label_train)
zzcb_score = zzcb_svc.score(sample_test, label_test)

print(f'Callable kernel classification test score: {zzcb_score}')