In [None]:
from qiskit import  Aer
from qiskit.utils import QuantumInstance, algorithm_globals
from qiskit.aqua.algorithms import VQC
from qiskit.algorithms.optimizers import COBYLA
from qiskit.aqua.utils import  map_label_to_class_name
from qiskit.circuit.library import TwoLocal, ZZFeatureMap
from qiskit.circuit.library import ZZFeatureMap
import logging
from qiskit.aqua import set_qiskit_aqua_logging
set_qiskit_aqua_logging(logging.DEBUG)

import pandas as pd
import numpy as np
from math import *
from sklearn.metrics import *

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [None]:
# Read out CSV

df = pd.read_csv('synthetic_data.csv', sep=',')

In [None]:
# Review the information related to the dataframe

df.info()

<bound method DataFrame.info of        X1        X2        X3        X4        X5        X6        X7  \
0       1  0.031390  0.031390  0.031390  0.031390  0.031390  0.031390   
1       1  5.234565  5.234565  5.234565  5.234565  5.234565  5.234565   
2       1  9.354209  9.354209  9.354209  9.354209  9.354209  9.354209   
3       1 -8.133407 -8.133407 -8.133407 -8.133407 -8.133407 -8.133407   
4       1 -5.471547 -5.471547 -5.471547 -5.471547 -5.471547 -5.471547   
...    ..       ...       ...       ...       ...       ...       ...   
49995   1  6.592156  6.592156  6.592156  6.592156  6.592156  6.592156   
49996   1 -7.705122 -7.705122 -7.705122 -7.705122 -7.705122 -7.705122   
49997   1  5.117075  5.117075  5.117075  5.117075  5.117075  5.117075   
49998   1  0.255174  0.255174  0.255174  0.255174  0.255174  0.255174   
49999   1  1.638036  1.638036  1.638036  1.638036  1.638036  1.638036   

             X8        X9       X10  y  
0      0.031390  0.031390  0.031390  1  
1      5.

In [None]:
# Table of the description of the dataframe related to fixed parameters

df.describe()

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,y
count,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0
mean,1.0,0.028783,0.028783,0.028783,0.028783,0.028783,0.028783,0.028783,0.028783,0.028783,0.47876
std,0.0,5.761384,5.761384,5.761384,5.761384,5.761384,5.761384,5.761384,5.761384,5.761384,0.499554
min,1.0,-9.999899,-9.999899,-9.999899,-9.999899,-9.999899,-9.999899,-9.999899,-9.999899,-9.999899,0.0
25%,1.0,-4.926806,-4.926806,-4.926806,-4.926806,-4.926806,-4.926806,-4.926806,-4.926806,-4.926806,0.0
50%,1.0,0.05761,0.05761,0.05761,0.05761,0.05761,0.05761,0.05761,0.05761,0.05761,0.0
75%,1.0,4.996073,4.996073,4.996073,4.996073,4.996073,4.996073,4.996073,4.996073,4.996073,1.0
max,1.0,9.999712,9.999712,9.999712,9.999712,9.999712,9.999712,9.999712,9.999712,9.999712,1.0


In [None]:
# Separation of labels

df_labels = df['y']
df.drop(['y'],axis = 1,inplace = True)

In [None]:
df_labels.head()

0    1
1    1
2    1
3    0
4    0
Name: y, dtype: int64

In [None]:
# Train data definition

df_features = df

In [None]:
df_features.head()

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10
0,1,0.03139,0.03139,0.03139,0.03139,0.03139,0.03139,0.03139,0.03139,0.03139
1,1,5.234565,5.234565,5.234565,5.234565,5.234565,5.234565,5.234565,5.234565,5.234565
2,1,9.354209,9.354209,9.354209,9.354209,9.354209,9.354209,9.354209,9.354209,9.354209
3,1,-8.133407,-8.133407,-8.133407,-8.133407,-8.133407,-8.133407,-8.133407,-8.133407,-8.133407
4,1,-5.471547,-5.471547,-5.471547,-5.471547,-5.471547,-5.471547,-5.471547,-5.471547,-5.471547


In [None]:
# Split dataset into train and test

sample_train, sample_test, label_train, label_test = train_test_split(
     df_features, df_labels, test_size=0.2, random_state=22)

# Reduce dimensions using PCA so later you can fit the dimensions with the qubits

n_dim = 2
pca = PCA(n_components=n_dim).fit(sample_train)
sample_train = pca.transform(sample_train)
sample_test = pca.transform(sample_test)

# Normalize

std_scale = StandardScaler().fit(sample_train)
sample_train = std_scale.transform(sample_train)
sample_test = std_scale.transform(sample_test)

# Scale for better fit within the feature map

samples = np.append(sample_train, sample_test, axis=0)
minmax_scale = MinMaxScaler((-1, 1)).fit(samples)
sample_train = minmax_scale.transform(sample_train)
sample_test = minmax_scale.transform(sample_test)

# Select a sample for a better control of the research and wall time

train_size = 160
sample_train = sample_train[:train_size]
label_train = label_train[:train_size]

test_size = 40
sample_test = sample_test[:test_size]
label_test = label_test[:test_size]

In [None]:
# Review the balance of the target variable in train

label_train.value_counts(normalize=True)*100

1    53.75
0    46.25
Name: y, dtype: float64

In [None]:
# Review the balance of the target variable in test

label_test.value_counts(normalize=True)*100

1    52.5
0    47.5
Name: y, dtype: float64

In [None]:
# Train and test definition for further comparison
zero_train_input = sample_train[label_train == 0]
one_train_input = sample_train[label_train == 1]
zero_train = zero_train_input
one_train = one_train_input

training_input = {0:zero_train,1:one_train}

zero_test_input = sample_test[label_test == 0]
one_test_input = sample_test[label_test == 1]
zero_test = zero_test_input
one_test = one_test_input

test_input = {0:zero_test, 1:one_test}

datapoints = []
datapoints.append(np.concatenate((zero_test, one_test)))
dp_y = np.append(np.zeros(zero_test.shape[0]),np.ones(one_test.shape[0]))
datapoints.append(dp_y)

In [None]:
# Basic parameters for hybrid model

seed = 8500
feature_dim = n_dim

In [None]:
# Define feature_map, optimizer and var_form

feature_map = ZZFeatureMap(feature_dimension=feature_dim, reps=2)
optimizer = COBYLA(maxiter=20, disp=True, rhobeg=1.0, tol=None)
var_form = TwoLocal(feature_dim, ['ry', 'rz'], 'cz', reps=3)
vqc = VQC(optimizer, feature_map, var_form, training_input, test_input, datapoints[0])

# Define the backend

backend = Aer.get_backend('qasm_simulator')

# Define the instance

quantum_instance = QuantumInstance(backend, shots=1024, seed_simulator=seed, seed_transpiler=seed)

# Model run

result = vqc.run(quantum_instance)

print(f'Testing success ratio: {result["testing_accuracy"]}')
print()
print('Prediction from datapoints set:')
print(f'  ground truth: {map_label_to_class_name(datapoints[1], vqc.label_to_class)}')
print(f'  prediction:   {result["predicted_classes"]}')
predicted_labels = result["predicted_labels"]
print(f'  success rate: {100*np.count_nonzero(predicted_labels == datapoints[1])/len(predicted_labels)}%')

2022-05-20 15:54:09,955:qiskit.aqua.algorithms.vq_algorithm:INFO: Starting optimizer.
bounds=[(None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None)]
initial point=[-0.9192223  -0.10874246  2.34200303  0.20263471 -1.05675895 -1.9927186
 -1.45682903 -0.09243282  0.19305317  0.89051176 -0.57415853  0.24143234
  2.27245551  0.58441587  0.21972644 -1.52063582]
2022-05-20 15:54:11,595:qiskit.aqua.algorithms.classifiers.vqc:DEBUG: Intermediate batch cost: 0.9832222704763914
2022-05-20 15:54:13,746:qiskit.aqua.algorithms.classifiers.vqc:DEBUG: Intermediate batch cost: 0.8257583926801167
2022-05-20 15:54:15,784:qiskit.aqua.algorithms.classifiers.vqc:DEBUG: Intermediate batch cost: 0.8135202409006439
2022-05-20 15:54:17,601:qiskit.aqua.algorithms.classifiers.vqc:DEBUG: Intermediate batch cost: 0.7518432339402983
2022-05-20 15:54:

Testing success ratio: 0.725

Prediction from datapoints set:
  ground truth: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
  prediction:   [0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0]
  success rate: 72.5%


In [None]:
# Print the classification report and important metrics

print(metrics.confusion_matrix(dp_y,result['predicted_classes']))
print(metrics.accuracy_score(dp_y,result['predicted_classes']))
print(metrics.classification_report(dp_y,result['predicted_classes'], target_names=['0','1']))
print(metrics.balanced_accuracy_score(dp_y,result['predicted_classes']))

[[12  7]
 [ 4 17]]
0.725
              precision    recall  f1-score   support

           0       0.75      0.63      0.69        19
           1       0.71      0.81      0.76        21

    accuracy                           0.73        40
   macro avg       0.73      0.72      0.72        40
weighted avg       0.73      0.72      0.72        40

0.7205513784461153
