# Multiclass Variational Quantum Classifier

In [46]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

#myqlm
from qat.lang.AQASM import Program, H, RX, RY, RZ, Z, CNOT
from qat.lang.AQASM import *
from qat.qpus import get_default_qpu
from qat.core import Observable, Term
from qat.plugins import ScipyMinimizePlugin

#imports
import ipynb
import import_ipynb
from vqc_functions import data_embedding, ansatz

## Data importing and preprocessing

MinMax with feature range from $0$ to $2\pi$.

In [47]:
data = pd.read_csv(r'C:\\Users\\OS-5818\\Downloads\\credit_data.txt')
label = data.iloc[:, -1]
features = data.iloc[:, :-1]

In [48]:
x = features.values #returns a numpy array
#data normalization (angles from 0 to 2pi)
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 2*np.pi))
x_scaled = min_max_scaler.fit_transform(x)
features = pd.DataFrame(x_scaled)
data = features.assign(labels = label)

#spliting data
training_data, testing_data = train_test_split(data, test_size=0.2, random_state=25)
training_features = training_data.iloc[:, :-1]
training_labels = training_data.iloc[:, -1]
testing_features = testing_data.iloc[:, :-1]
testing_labels = testing_data.iloc[:, -1]

## Loss function

Loss function used to training the QNN was the mean squared error (MSE) so that the estimator $\hat{x} = \hat{p}_{|x\rangle}$ is compared with label $p_{|x\rangle} = 1$ since as many as $\hat{x} \approx 1$ will minimize the MSE. Therefore, we can write the loss function as

$$\mathcal{L} = \frac{1}{N}\sum_{k=0}^{N}(\hat{p}_{|x\rangle} -1)^2$$

In [49]:
num_layers = 2; num_qubits = (features.shape[1]); num_labels = 2

def loss(parameters):
    """
    Args
        parameters: a np.array for tunable parameters;
    Outpu
        cost/len(training_data): mean squared error;
        
    """
    
    cost=0
    for k in range(len(training_features)):
        v = training_features.iloc[k,:].to_numpy()

        #create program
        circuit = Program()
        qbits = circuit.qalloc(len(v))

        #create subcircuits
        encoding = data_embedding(x=v)
        variational = ansatz(parameters, feature_len=len(v), num_layers=num_layers)

        #adding subcircuits into main circuit
        encoding(qbits)
        variational(qbits)

        qc = circuit.to_circ()
        job = qc.to_job()
        result = get_default_qpu().submit(job)

        meas = {}; estimator = 0
        for sample in result:
            #sample._state returns quantum state in the decimal basis
            meas[sample._state] = sample.probability

            if sample._state in range(32):
                estimator -= sample.probability
            else:
                estimator += sample.probability
        
        
        
        # debugging
        #for eigstate in range(2**num_qubits):
        #    if eigstate not in meas:
        #        meas[eigstate]=0

        
        # parity function: Paulistring <ZIIIII> 
        #estimator = (- sum([ meas[i] for i in range(int((2**num_qubits)/2)) ]) + 
        #            sum([meas[i] for i in range(int((2**num_qubits)/2), int(num_qubits**2))]))
        
        #print(len(meas))
        
        #calculating cost
        if list(training_labels)[k]== 1:
            cost += (estimator - 1)**2
        elif list(training_labels)[k]== -1:
            cost += (estimator + 1)**2
        
    return cost/len(training_features) #Mean squared error / Empirical Risk / Log Loss


## Training 

In [50]:
import scipy
from scipy.optimize import minimize

iteration=1
convergence = []
def callback(variational_parameters):
    global iteration
    convergence.append(loss(variational_parameters))
    print("Iteration: ", iteration, " \t Loss: ",  loss(variational_parameters))
    iteration += 1


res = scipy.optimize.minimize(loss, x0=np.random.uniform(0, 2*np.pi, num_layers*num_qubits), 
                                method = 'SLSQP', callback=callback,
                                options={'maxiter': 200, 'ftol': 1e-06, 'iprint': 1, 'disp': True, 
                                'eps': 1.4901161193847656e-08, 'finite_diff_rel_step': None})

res

KeyboardInterrupt: 

## Testing 

In [52]:
def testing():
    trues = []
    for i, y_data in enumerate(testing_labels):

        circuit = Program()
        v = testing_features.iloc[i].to_numpy()
        qbits = circuit.qalloc(len(v))

        #create subcircuits
        encoding = data_embedding(x=v)
        #trained ansatz
        variational = ansatz(res['x'], feature_len=len(v), num_layers=num_layers)

        #adding subcircuits into main circuit
        encoding(qbits)
        variational(qbits)
        
        qc = circuit.to_circ()
        job = qc.to_job()
        result = get_default_qpu().submit(job)

        meas = {}
        for sample in result:
            meas[sample._state] = sample.probability
        
        # parity function: Paulistring <ZIIIII> 
        estimator = - sum([meas[i] for i in range((num_qubits**2)/2)]) + sum([meas[i] for i in range((num_qubits**2)/2, num_qubits**2)])

        if (list(testing_labels)[i]== 1 and estimator > 0):
            trues.append(1)
            print(f"Sample: {i} \t Inference: ", True)
        elif (list(testing_labels)[i]== -1 and estimator < 0):
            trues.append(1)
            print(f"Sample: {i} \t Inference: ", True)
        else:
            print(f"Sample: {i} \t Inference: ", False, " - Misclassified")

    return trues


print('\n \n \n Accuracy: ', len(testing())/len(testing_labels))

NameError: name 'res' is not defined

In [None]:
import matplotlib.pyplot as plt
plt.plot(convergence)
plt.xlabel("Epoch")
plt.ylabel("Loss value")
plt.grid()


In [None]:
print(f" A total of {testing_data.shape[0]} testing samples were used and {len(testing())} were classified correctly.")

In [None]:
features.shape

In [51]:
testing_labels

28    -1
72    -1
70    -1
85     1
128    1
65    -1
127    1
42    -1
81     1
93     1
21    -1
31    -1
26    -1
133    1
6     -1
58    -1
125    1
139    1
64    -1
94     1
79     1
99     1
66    -1
20    -1
34    -1
117    1
69    -1
122    1
120    1
41    -1
Name: labels, dtype: int64