In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn import metrics

import functools

from qiskit import BasicAer
from qiskit.circuit.library import ZZFeatureMap
from qiskit.utils import QuantumInstance, algorithm_globals
from qiskit_machine_learning.algorithms import QSVC
from qiskit_machine_learning.kernels import QuantumKernel
from qiskit_machine_learning.datasets import ad_hoc_data
import logging
from qiskit.aqua import set_qiskit_aqua_logging
set_qiskit_aqua_logging(logging.DEBUG)


In [2]:
import pandas as pd
import numpy as np

def clean_dataset(df):
    assert isinstance(df, pd.DataFrame), "df needs to be a pd.DataFrame"
    df.dropna(inplace=True)
    indices_to_keep = ~df.isin([np.nan, np.inf, -np.inf]).any(1)
    return df[indices_to_keep].astype(np.float64)

In [3]:
# Read out CSV
df = pd.read_csv('training_observables.csv', header=None)
#df = pd.read_csv('training_observables.csv', sep=',')
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,77,78,79,80,81,82,83,84,85,86
0,0,0.0,0.493280,119.32780,0.026617,0.019539,0.001285,0.004125,-0.114632,-0.192545,...,0.001793,0.003997,0.008152,0.015164,0.025790,0.040464,0.059261,0.081969,0.108310,0.138916
1,1,0.0,0.499796,140.66150,0.027218,0.015303,0.017385,0.001732,1.389377,0.282950,...,0.000811,0.001750,0.003415,0.006048,0.009945,0.015720,0.024593,0.038424,0.059534,0.090801
2,2,0.0,0.503392,152.76870,0.011552,0.016412,0.004306,0.007357,-0.133998,0.223442,...,0.003183,0.006916,0.013742,0.025196,0.042973,0.068877,0.104854,0.153033,0.215902,0.298440
3,3,0.0,0.488244,123.84110,0.016648,0.007232,0.004221,0.001482,1.066655,0.425053,...,0.000763,0.001691,0.003347,0.005789,0.008923,0.013181,0.020897,0.036646,0.066004,0.117576
4,4,0.0,0.496821,130.78380,0.029870,0.034460,0.016352,0.001592,0.849569,-0.411059,...,0.000919,0.002004,0.003871,0.006358,0.008433,0.007623,0.003397,0.026416,0.078568,0.177591
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22570,22570,1.0,0.495708,43.28272,0.031320,0.010866,0.007185,0.000848,-0.414526,-0.690625,...,0.000392,0.000845,0.001672,0.003088,0.005341,0.008532,0.012373,0.016067,0.018169,0.015897
22571,22571,1.0,0.536097,35.77644,0.089789,0.015850,0.006881,0.005601,-0.028845,0.934089,...,0.001865,0.003920,0.007764,0.015250,0.029065,0.051837,0.085413,0.130882,0.189289,0.264222
22572,22572,1.0,0.516989,40.66633,0.056547,0.031709,0.012869,0.007707,0.561080,0.025548,...,0.003741,0.007919,0.014907,0.025046,0.037916,0.052361,0.066622,0.078427,0.085027,0.082829
22573,22573,1.0,0.514398,48.36263,0.059926,0.013307,0.006346,0.004572,0.499208,-1.016149,...,0.002169,0.004623,0.008857,0.015174,0.023454,0.033371,0.044792,0.058167,0.075026,0.098845


In [4]:
# Every Feature name
header_list = {
    0 : 'number',
    1 : 'eos_type',
    2 :	'mean_pt',
    3 :	'dndy',
    4 :	'v2',
    5 :	'v3',
    6 :	'v4',
    7 : 'v5',
    8 :	'psi2',
    9 :	'psi3',
    10 : 'psi4',
    11 :	'psi5',
    12 :'ptspec_bin0',
    13 : 'ptspec_bin',
    14 : 'ptspec_bin2',
    15 :	'ptspec_bin3',
    16 : 'ptspec_bin4',
    17 :	'ptspec_bin5',
    18 :	'ptspec_bin6',
    19 :	'ptspec_bin7',
    20 :	'ptspec_bin8',
    21 : 	'ptspec_bin9',
    22 :	'ptspec_bin10',
    23 :	'ptspec_bin11',
    24 :	'ptspec_bin12',
    25 :	'ptspec_bin13',
    26 :	'ptspec_bin14',
    27 :	'v2_ptbin0',
    28 :	'v2_ptbin1',
    29 :	'v2_ptbin2',
    30 :	'v2_ptbin3',
    31 :	'v2_ptbin4',
    32 :	'v2_ptbin5',
    33 :	'v2_ptbin6',
    34 :	'v2_ptbin7',
    35 :	'v2_ptbin8',
    36 :	'v2_ptbin9',
    37 :	'v2_ptbin10',
    38 :	'v2_ptbin11',
    39 :	'v2_ptbin12',
    40 :	'v2_ptbin13',
    41 :	'v2_ptbin14',
    42 :	'v3_ptbin0',
    43 :	'v3_ptbin1',
    44 :	'v3_ptbin2',
    45 :	'v3_ptbin3',
    46 :	'v3_ptbin4',
    47 :	'v3_ptbin5',
    48 :	'v3_ptbin6',
    49 :	'v3_ptbin7',
    50 : 'v3_ptbin8',
    51 : 'v3_ptbin9',
    52 : 'v3_ptbin10',
    53 : 'v3_ptbin11',
    54 : 'v3_ptbin12',
    55 : 'v3_ptbin13',
    56 : 'v3_ptbin14',
    57 : 'v4_ptbin0',
    58 : 'v4_ptbin1',
    59 : 'v4_ptbin2',
    60 : 'v4_ptbin3',
    61 : 'v4_ptbin4',
    62 : 'v4_ptbin5',
    63 : 'v4_ptbin6',
    64 : 'v4_ptbin7',
    65 : 'v4_ptbin8',
    66 : 'v4_ptbin9',
    67 : 'v4_ptbin10',
    68 : 'v4_ptbin11',
    69 : 'v4_ptbin12',
    70 : 'v4_ptbin13',
    71 : 'v4_ptbin14',
    72 : 'v5_ptbin0',
    73 : 'v5_ptbin1',
    74 : 'v5_ptbin2',
    75 : 'v5_ptbin3',
    76 : 'v5_ptbin4',
    77 : 'v5_ptbin5',
    78 : 'v5_ptbin6',
    79 : 'v5_ptbin7',
    80 : 'v5_ptbin8',
    81 : 'v5_ptbin9',
    82 : 'v5_ptbin10',
    83 : 'v5_ptbin11',
    84 : 'v5_ptbin12',
    85 : 'v5_ptbin13',
    86 : 'v5_ptbin14'
}

In [5]:
df = df.rename(columns=header_list)
df.head(5)

Unnamed: 0,number,eos_type,mean_pt,dndy,v2,v3,v4,v5,psi2,psi3,...,v5_ptbin5,v5_ptbin6,v5_ptbin7,v5_ptbin8,v5_ptbin9,v5_ptbin10,v5_ptbin11,v5_ptbin12,v5_ptbin13,v5_ptbin14
0,0,0.0,0.49328,119.3278,0.026617,0.019539,0.001285,0.004125,-0.114632,-0.192545,...,0.001793,0.003997,0.008152,0.015164,0.02579,0.040464,0.059261,0.081969,0.10831,0.138916
1,1,0.0,0.499796,140.6615,0.027218,0.015303,0.017385,0.001732,1.389377,0.28295,...,0.000811,0.00175,0.003415,0.006048,0.009945,0.01572,0.024593,0.038424,0.059534,0.090801
2,2,0.0,0.503392,152.7687,0.011552,0.016412,0.004306,0.007357,-0.133998,0.223442,...,0.003183,0.006916,0.013742,0.025196,0.042973,0.068877,0.104854,0.153033,0.215902,0.29844
3,3,0.0,0.488244,123.8411,0.016648,0.007232,0.004221,0.001482,1.066655,0.425053,...,0.000763,0.001691,0.003347,0.005789,0.008923,0.013181,0.020897,0.036646,0.066004,0.117576
4,4,0.0,0.496821,130.7838,0.02987,0.03446,0.016352,0.001592,0.849569,-0.411059,...,0.000919,0.002004,0.003871,0.006358,0.008433,0.007623,0.003397,0.026416,0.078568,0.177591


In [6]:
df = df.drop(['number'], axis = 1)
df

Unnamed: 0,eos_type,mean_pt,dndy,v2,v3,v4,v5,psi2,psi3,psi4,...,v5_ptbin5,v5_ptbin6,v5_ptbin7,v5_ptbin8,v5_ptbin9,v5_ptbin10,v5_ptbin11,v5_ptbin12,v5_ptbin13,v5_ptbin14
0,0.0,0.493280,119.32780,0.026617,0.019539,0.001285,0.004125,-0.114632,-0.192545,0.604534,...,0.001793,0.003997,0.008152,0.015164,0.025790,0.040464,0.059261,0.081969,0.108310,0.138916
1,0.0,0.499796,140.66150,0.027218,0.015303,0.017385,0.001732,1.389377,0.282950,0.155580,...,0.000811,0.001750,0.003415,0.006048,0.009945,0.015720,0.024593,0.038424,0.059534,0.090801
2,0.0,0.503392,152.76870,0.011552,0.016412,0.004306,0.007357,-0.133998,0.223442,-0.593591,...,0.003183,0.006916,0.013742,0.025196,0.042973,0.068877,0.104854,0.153033,0.215902,0.298440
3,0.0,0.488244,123.84110,0.016648,0.007232,0.004221,0.001482,1.066655,0.425053,-0.017385,...,0.000763,0.001691,0.003347,0.005789,0.008923,0.013181,0.020897,0.036646,0.066004,0.117576
4,0.0,0.496821,130.78380,0.029870,0.034460,0.016352,0.001592,0.849569,-0.411059,0.742171,...,0.000919,0.002004,0.003871,0.006358,0.008433,0.007623,0.003397,0.026416,0.078568,0.177591
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22570,1.0,0.495708,43.28272,0.031320,0.010866,0.007185,0.000848,-0.414526,-0.690625,0.623115,...,0.000392,0.000845,0.001672,0.003088,0.005341,0.008532,0.012373,0.016067,0.018169,0.015897
22571,1.0,0.536097,35.77644,0.089789,0.015850,0.006881,0.005601,-0.028845,0.934089,0.126117,...,0.001865,0.003920,0.007764,0.015250,0.029065,0.051837,0.085413,0.130882,0.189289,0.264222
22572,1.0,0.516989,40.66633,0.056547,0.031709,0.012869,0.007707,0.561080,0.025548,0.430158,...,0.003741,0.007919,0.014907,0.025046,0.037916,0.052361,0.066622,0.078427,0.085027,0.082829
22573,1.0,0.514398,48.36263,0.059926,0.013307,0.006346,0.004572,0.499208,-1.016149,0.443735,...,0.002169,0.004623,0.008857,0.015174,0.023454,0.033371,0.044792,0.058167,0.075026,0.098845


In [7]:
df = clean_dataset(df)

In [8]:
# Review the information related to the dataframe

df.info

<bound method DataFrame.info of        eos_type   mean_pt       dndy        v2        v3        v4        v5  \
0           0.0  0.493280  119.32780  0.026617  0.019539  0.001285  0.004125   
1           0.0  0.499796  140.66150  0.027218  0.015303  0.017385  0.001732   
2           0.0  0.503392  152.76870  0.011552  0.016412  0.004306  0.007357   
3           0.0  0.488244  123.84110  0.016648  0.007232  0.004221  0.001482   
4           0.0  0.496821  130.78380  0.029870  0.034460  0.016352  0.001592   
...         ...       ...        ...       ...       ...       ...       ...   
22570       1.0  0.495708   43.28272  0.031320  0.010866  0.007185  0.000848   
22571       1.0  0.536097   35.77644  0.089789  0.015850  0.006881  0.005601   
22572       1.0  0.516989   40.66633  0.056547  0.031709  0.012869  0.007707   
22573       1.0  0.514398   48.36263  0.059926  0.013307  0.006346  0.004572   
22574       1.0  0.523754   58.44409  0.051211  0.039774  0.009579  0.002640   

       

In [9]:
# Table of the description of the dataframe related to fixed parameters

df.describe()

Unnamed: 0,eos_type,mean_pt,dndy,v2,v3,v4,v5,psi2,psi3,psi4,...,v5_ptbin5,v5_ptbin6,v5_ptbin7,v5_ptbin8,v5_ptbin9,v5_ptbin10,v5_ptbin11,v5_ptbin12,v5_ptbin13,v5_ptbin14
count,22573.0,22573.0,22573.0,22573.0,22573.0,22573.0,22573.0,22573.0,22573.0,22573.0,...,22573.0,22573.0,22573.0,22573.0,22573.0,22573.0,22573.0,22573.0,22573.0,22573.0
mean,0.404909,0.511822,107.499991,0.064737,0.024196,0.012249,0.005511,-0.003248,0.016878,-0.00512,...,0.002571,0.005195,0.009765,0.017114,0.028097,0.043605,0.064677,0.092782,0.131627,0.275086
std,0.490885,0.0513,81.587016,0.031721,0.014125,0.008256,0.004168,0.372459,0.605286,0.38663,...,0.001911,0.003586,0.006371,0.010743,0.017159,0.02601,0.037611,0.052304,0.073179,2.872055
min,0.0,0.387254,13.89908,0.000505,0.000317,8.6e-05,2.2e-05,-1.569997,-1.047155,-0.785385,...,2.5e-05,5.1e-05,3.2e-05,5e-05,0.000205,0.000291,0.00038,0.000378,0.00146,0.000995
25%,0.0,0.476563,53.07612,0.041704,0.013792,0.006258,0.002536,-0.200354,-0.502925,-0.281094,...,0.001227,0.002597,0.005078,0.009123,0.015179,0.023906,0.036021,0.05262,0.076126,0.108737
50%,0.0,0.499808,85.30453,0.061818,0.021871,0.01041,0.004433,-0.000867,0.022481,-0.0048,...,0.002098,0.004395,0.008513,0.015155,0.025131,0.039324,0.058995,0.085379,0.121716,0.173801
75%,1.0,0.552509,127.3066,0.08462,0.032024,0.016069,0.007318,0.197197,0.544685,0.275647,...,0.003375,0.006884,0.013031,0.022948,0.037907,0.05907,0.08764,0.125191,0.177276,0.251671
max,1.0,0.706444,448.8062,0.236732,0.097754,0.062505,0.034186,1.568587,1.047186,0.78537,...,0.017935,0.031661,0.053479,0.085983,0.130754,0.188419,0.258436,0.345037,0.924904,231.0445


In [10]:
# Separation of labels

df_labels = df['eos_type']
df.drop(['eos_type'],axis = 1,inplace = True)

In [11]:
df_labels.head()

0    0.0
1    0.0
2    0.0
3    0.0
4    0.0
Name: eos_type, dtype: float64

In [12]:
# Train data definition

df_features = df

In [13]:
df_features.head()

Unnamed: 0,mean_pt,dndy,v2,v3,v4,v5,psi2,psi3,psi4,psi5,...,v5_ptbin5,v5_ptbin6,v5_ptbin7,v5_ptbin8,v5_ptbin9,v5_ptbin10,v5_ptbin11,v5_ptbin12,v5_ptbin13,v5_ptbin14
0,0.49328,119.3278,0.026617,0.019539,0.001285,0.004125,-0.114632,-0.192545,0.604534,0.220925,...,0.001793,0.003997,0.008152,0.015164,0.02579,0.040464,0.059261,0.081969,0.10831,0.138916
1,0.499796,140.6615,0.027218,0.015303,0.017385,0.001732,1.389377,0.28295,0.15558,0.376342,...,0.000811,0.00175,0.003415,0.006048,0.009945,0.01572,0.024593,0.038424,0.059534,0.090801
2,0.503392,152.7687,0.011552,0.016412,0.004306,0.007357,-0.133998,0.223442,-0.593591,-0.087081,...,0.003183,0.006916,0.013742,0.025196,0.042973,0.068877,0.104854,0.153033,0.215902,0.29844
3,0.488244,123.8411,0.016648,0.007232,0.004221,0.001482,1.066655,0.425053,-0.017385,-0.303388,...,0.000763,0.001691,0.003347,0.005789,0.008923,0.013181,0.020897,0.036646,0.066004,0.117576
4,0.496821,130.7838,0.02987,0.03446,0.016352,0.001592,0.849569,-0.411059,0.742171,-0.524277,...,0.000919,0.002004,0.003871,0.006358,0.008433,0.007623,0.003397,0.026416,0.078568,0.177591


In [14]:
# Split dataset into train and test

sample_train, sample_test, label_train, label_test = train_test_split(
     df_features, df_labels, test_size=0.2, random_state=22)

# Reduce dimensions using PCA so later you can fit the dimensions with the qubits

n_dim = 2
pca = PCA(n_components=n_dim).fit(sample_train)
sample_train = pca.transform(sample_train)
sample_test = pca.transform(sample_test)

# Normalize

std_scale = StandardScaler().fit(sample_train)
sample_train = std_scale.transform(sample_train)
sample_test = std_scale.transform(sample_test)

# Scale for better fit within the feature map

samples = np.append(sample_train, sample_test, axis=0)
minmax_scale = MinMaxScaler((-1, 1)).fit(samples)
sample_train = minmax_scale.transform(sample_train)
sample_test = minmax_scale.transform(sample_test)

# Select a sample for a better control of the research and wall time

train_size = 160
sample_train = sample_train[:train_size]
label_train = label_train[:train_size]

test_size = 40
sample_test = sample_test[:test_size]
label_test = label_test[:test_size]

In [15]:
# Review the balance of the target variable in train

label_train.value_counts(normalize=True)*100

0.0    62.5
1.0    37.5
Name: eos_type, dtype: float64

In [16]:
# Review the balance of the target variable in test

label_test.value_counts(normalize=True)*100

1.0    52.5
0.0    47.5
Name: eos_type, dtype: float64

In [17]:
# Basic parameters for hybrid model

seed = 8500
feature_dim = n_dim

In [18]:
# Define feature_map

feature_map = ZZFeatureMap(feature_dimension=feature_dim, reps=2)

# Define the backend
backend = QuantumInstance(
    BasicAer.get_backend("qasm_simulator"), shots=256, seed_simulator=seed, seed_transpiler=seed
)

# Define the kernel

kernel = QuantumKernel(feature_map=feature_map, quantum_instance=backend)

# Model run
svc = SVC(kernel=kernel.evaluate)
svc.fit(sample_train, label_train)
score = svc.score(sample_test, label_test)

print(f"Callable kernel classification test score: {score}")

Callable kernel classification test score: 0.675


In [19]:
result_predict = svc.predict(sample_test)

In [20]:
# Print the classification report and important metrics

print(metrics.classification_report(label_test,result_predict))
print(metrics.precision_score(label_test,result_predict))
print(metrics.recall_score(label_test,result_predict))
print(metrics.f1_score(label_test,result_predict))
print(metrics.balanced_accuracy_score(label_test,result_predict))

              precision    recall  f1-score   support

         0.0       0.62      0.79      0.70        19
         1.0       0.75      0.57      0.65        21

    accuracy                           0.68        40
   macro avg       0.69      0.68      0.67        40
weighted avg       0.69      0.68      0.67        40

0.75
0.5714285714285714
0.6486486486486486
0.6804511278195489
