In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn import metrics

import functools

from qiskit import BasicAer
from qiskit.circuit.library import ZZFeatureMap
from qiskit.utils import QuantumInstance, algorithm_globals
from qiskit_machine_learning.algorithms import QSVC
from qiskit_machine_learning.kernels import QuantumKernel
from qiskit_machine_learning.datasets import ad_hoc_data
import logging
from qiskit.aqua import set_qiskit_aqua_logging
set_qiskit_aqua_logging(logging.DEBUG)


In [None]:
# Read out CSV

df = pd.read_csv('synthetic_data.csv', sep=',')

In [None]:
# Review the information related to the dataframe

df.info()

<bound method DataFrame.info of        X1        X2        X3        X4        X5        X6        X7  \
0       1  0.031390  0.031390  0.031390  0.031390  0.031390  0.031390   
1       1  5.234565  5.234565  5.234565  5.234565  5.234565  5.234565   
2       1  9.354209  9.354209  9.354209  9.354209  9.354209  9.354209   
3       1 -8.133407 -8.133407 -8.133407 -8.133407 -8.133407 -8.133407   
4       1 -5.471547 -5.471547 -5.471547 -5.471547 -5.471547 -5.471547   
...    ..       ...       ...       ...       ...       ...       ...   
49995   1  6.592156  6.592156  6.592156  6.592156  6.592156  6.592156   
49996   1 -7.705122 -7.705122 -7.705122 -7.705122 -7.705122 -7.705122   
49997   1  5.117075  5.117075  5.117075  5.117075  5.117075  5.117075   
49998   1  0.255174  0.255174  0.255174  0.255174  0.255174  0.255174   
49999   1  1.638036  1.638036  1.638036  1.638036  1.638036  1.638036   

             X8        X9       X10  y  
0      0.031390  0.031390  0.031390  1  
1      5.

In [None]:
# Table of the description of the dataframe related to fixed parameters

df.describe()

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,y
count,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0,50000.0
mean,1.0,0.028783,0.028783,0.028783,0.028783,0.028783,0.028783,0.028783,0.028783,0.028783,0.47876
std,0.0,5.761384,5.761384,5.761384,5.761384,5.761384,5.761384,5.761384,5.761384,5.761384,0.499554
min,1.0,-9.999899,-9.999899,-9.999899,-9.999899,-9.999899,-9.999899,-9.999899,-9.999899,-9.999899,0.0
25%,1.0,-4.926806,-4.926806,-4.926806,-4.926806,-4.926806,-4.926806,-4.926806,-4.926806,-4.926806,0.0
50%,1.0,0.05761,0.05761,0.05761,0.05761,0.05761,0.05761,0.05761,0.05761,0.05761,0.0
75%,1.0,4.996073,4.996073,4.996073,4.996073,4.996073,4.996073,4.996073,4.996073,4.996073,1.0
max,1.0,9.999712,9.999712,9.999712,9.999712,9.999712,9.999712,9.999712,9.999712,9.999712,1.0


In [None]:
# Separation of labels

df_labels = df['y']
df.drop(['y'],axis = 1,inplace = True)

In [None]:
df_labels.head()

0    1
1    1
2    1
3    0
4    0
Name: y, dtype: int64

In [None]:
# Train data definition

df_features = df

In [None]:
df_features.head()

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10
0,1,0.03139,0.03139,0.03139,0.03139,0.03139,0.03139,0.03139,0.03139,0.03139
1,1,5.234565,5.234565,5.234565,5.234565,5.234565,5.234565,5.234565,5.234565,5.234565
2,1,9.354209,9.354209,9.354209,9.354209,9.354209,9.354209,9.354209,9.354209,9.354209
3,1,-8.133407,-8.133407,-8.133407,-8.133407,-8.133407,-8.133407,-8.133407,-8.133407,-8.133407
4,1,-5.471547,-5.471547,-5.471547,-5.471547,-5.471547,-5.471547,-5.471547,-5.471547,-5.471547


In [None]:
# Split dataset into train and test

sample_train, sample_test, label_train, label_test = train_test_split(
     df_features, df_labels, test_size=0.2, random_state=22)

# Reduce dimensions using PCA so later you can fit the dimensions with the qubits

n_dim = 2
pca = PCA(n_components=n_dim).fit(sample_train)
sample_train = pca.transform(sample_train)
sample_test = pca.transform(sample_test)

# Normalize

std_scale = StandardScaler().fit(sample_train)
sample_train = std_scale.transform(sample_train)
sample_test = std_scale.transform(sample_test)

# Scale for better fit within the feature map

samples = np.append(sample_train, sample_test, axis=0)
minmax_scale = MinMaxScaler((-1, 1)).fit(samples)
sample_train = minmax_scale.transform(sample_train)
sample_test = minmax_scale.transform(sample_test)

# Select a sample for a better control of the research and wall time

train_size = 160
sample_train = sample_train[:train_size]
label_train = label_train[:train_size]

test_size = 40
sample_test = sample_test[:test_size]
label_test = label_test[:test_size]

In [None]:
# Review the balance of the target variable in train

label_train.value_counts(normalize=True)*100

1    53.75
0    46.25
Name: y, dtype: float64

In [None]:
# Review the balance of the target variable in test

label_test.value_counts(normalize=True)*100

1    52.5
0    47.5
Name: y, dtype: float64

In [None]:
# Basic parameters for hybrid model

seed = 8500
feature_dim = n_dim

In [None]:
# Define feature_map

feature_map = ZZFeatureMap(feature_dimension=feature_dim, reps=2)

# Define the backend
backend = QuantumInstance(
    BasicAer.get_backend("qasm_simulator"), shots=256, seed_simulator=seed, seed_transpiler=seed
)

# Define the kernel

kernel = QuantumKernel(feature_map=feature_map, quantum_instance=backend)

# Model run
svc = SVC(kernel=kernel.evaluate)
svc.fit(sample_train, label_train)
score = svc.score(sample_test, label_test)

print(f"Callable kernel classification test score: {score}")

Callable kernel classification test score: 0.925


In [None]:
result_predict = svc.predict(sample_test)

In [None]:
# Print the classification report and important metrics

print(metrics.classification_report(label_test,result_predict))
print(metrics.precision_score(label_test,result_predict))
print(metrics.recall_score(label_test,result_predict))
print(metrics.f1_score(label_test,result_predict))
print(metrics.balanced_accuracy_score(label_test,result_predict))

              precision    recall  f1-score   support

           0       0.94      0.89      0.92        19
           1       0.91      0.95      0.93        21

    accuracy                           0.93        40
   macro avg       0.93      0.92      0.92        40
weighted avg       0.93      0.93      0.92        40

0.9090909090909091
0.9523809523809523
0.9302325581395349
0.9235588972431077
