In [28]:
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split

#myqlm
from qat.lang.AQASM import Program, H, RX, RY, RZ, Z, CNOT
from qat.lang.AQASM import *
from qat.qpus import get_default_qpu
from qat.core import Observable, Term
from qat.plugins import ScipyMinimizePlugin

#imports
import ipynb
import import_ipynb
from vqc_functions import data_embedding, ansatz

## Data importing and preprocessing

MinMax with feature range from $0$ to $2\pi$.

In [29]:
data = pd.read_csv(r'C:\\Users\\anton.albino\\Documents\\Anton\\codigos\\myqlm\\qnn\\data\\iris.data')
label = data.iloc[:, -1]
features = data.iloc[:, :-1]

In [30]:
x = features.values #returns a numpy array
#data normalization (angles from 0 to 2pi)
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 2*np.pi))
x_scaled = min_max_scaler.fit_transform(x)
features = pd.DataFrame(x_scaled)
data = features.assign(labels = label)

#spliting data
training_data, testing_data = train_test_split(data, test_size=0.2, random_state=25)
training_features = training_data.iloc[:, :-1]
training_labels = training_data.iloc[:, -1]
testing_features = testing_data.iloc[:, :-1]
testing_labels = testing_data.iloc[:, -1]

### TwoHot encoding

Encoding labels into quantum state in a following settings:

$3: |0011\rangle$ encoding Iris-setosa.
$6: |0110\rangle$ encoding Iris-versicolor.
$9: |1001\rangle$ encoding Iris-virginica.



In [31]:
twohotencoding_training = []
for i, iris in enumerate(training_labels):
    if iris == "Iris-setosa":
        twohotencoding_training.append(3)
    elif iris == "Iris-versicolor":
        twohotencoding_training.append(6)
    elif iris == "Iris-virginica":
        twohotencoding_training.append(9)

twohotencoding_testing = []
for i, iris in enumerate(testing_labels):
    if iris == "Iris-setosa":
        twohotencoding_testing.append(3)
    elif iris == "Iris-versicolor":
        twohotencoding_testing.append(6)
    elif iris == "Iris-virginica":
        twohotencoding_testing.append(9)

## Loss function

Loss function used to training the QNN was the mean squared error (MSE) so that the estimator $\hat{x} = \hat{p}_{|x\rangle}$ is compared with label $p_{|x\rangle} = 1$ since as many as $\hat{x} \approx 1$ will minimize the MSE. Therefore, we can write the loss function as

$$\mathcal{L} = \frac{1}{N}\sum_{k=0}^{N}(\hat{p}_{|x\rangle} -1)^2$$

In [32]:
num_layers = 5 
num_qubits = (features.shape[1])

def loss(parameters):
    """
    Args
        parameters: a np.array for tunable parameters;
    Outpu
        cost/len(training_data): mean squared error;
        
    """
    
    cost=0
    for k in range(len(training_features)):
        v = training_features.iloc[k,:].to_numpy()

        #create program
        circuit = Program()
        qbits = circuit.qalloc(len(v))

        #create subcircuits
        encoding = data_embedding(x=v)
        variational = ansatz(parameters, feature_len=len(v), num_layers=num_layers)

        #adding subcircuits into main circuit
        encoding(qbits)
        variational(qbits)

        qc = circuit.to_circ()
        job = qc.to_job()
        result = get_default_qpu().submit(job)

        meas = {}
        for sample in result:
            #sample._state returns quantum state in the decimal basis
            meas[sample._state] = sample.probability
        
        #calculating cost
        if twohotencoding_training[k] not in meas:
            cost += 1
        else:
            cost += (meas[twohotencoding_training[k]] - 1)**2
        
    return cost/len(training_features) #Mean squared error


## Training 

In [33]:
import scipy
from scipy.optimize import minimize
res = scipy.optimize.minimize(loss, x0=np.zeros(5*4), method='COBYLA')
res

     fun: 0.42580026608105
   maxcv: 0.0
 message: 'Maximum number of function evaluations has been exceeded.'
    nfev: 1000
  status: 2
 success: False
       x: array([-2.39068348,  1.69377882,  1.58442195, -1.64920869,  2.39702831,
       -1.36723857,  0.09821368,  1.17529924,  1.53238697,  0.06042407,
        1.11322538,  1.61491592, -0.36218976, -0.33446947, -0.07745673,
       -1.60918917, -1.40241721, -0.05185182,  1.03333309, -1.29921526])

## Testing 

In [34]:
trues = []
for i, y_data in enumerate(testing_labels):

    circuit = Program()
    v = testing_features.iloc[i].to_numpy()
    qbits = circuit.qalloc(len(v))

    #create subcircuits
    encoding = data_embedding(x=v)
    #trained ansatz
    variational = ansatz(res['x'], feature_len=len(v), num_layers=5)

    #adding subcircuits into main circuit
    encoding(qbits)
    variational(qbits)

    qc = circuit.to_circ()
    job = qc.to_job()
    result = get_default_qpu().submit(job)

    meas = {}
    for sample in result:
        meas[sample._state] = sample.probability

    #testing
    if (twohotencoding_testing[i]==3 and meas[3] > meas[6] and meas[3] > meas[9]):
        trues.append(1)
    elif (twohotencoding_testing[i]==6 and meas[6] > meas[3] and meas[6] > meas[9]):
        trues.append(1)
    elif (twohotencoding_testing[i]==9 and meas[9] > meas[3] and meas[9] > meas[6]):
        trues.append(1)


print('Accuracy: ', len(trues)/len(testing_labels))

Accuracy:  0.9
