# Global imports

In [1]:
# External imports
import pandas as pd
import numpy as np
from collections import Counter

# Sklearn imports
from sklearn.preprocessing import MaxAbsScaler, MinMaxScaler
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit, StratifiedKFold, GridSearchCV
from sklearn.svm import SVC
from sklearn import metrics
from sklearn.datasets import load_iris
from sklearn.multiclass import OneVsRestClassifier
from sklearn.model_selection import cross_validate, cross_val_predict

# Customized score functions
from qksvm.scores import get_scores, print_scores, print_cv_scores
from qksvm.QuantumFeatureMap import QuantumFeatureMap
from qksvm.QKSVC import QKSVC

seed = 12345
np.random.seed(seed)

In [2]:
def print_scores(score):
    print('Accuracy: {:.2f} +- {:.2f}'.format(
        np.mean(score['test_balanced_accuracy']), 
        np.std(score['test_balanced_accuracy']), )
    )
    print('      F1: {:.2f} +- {:.2f}'.format(
        np.mean(score['test_f1_macro']), 
        np.std(score['test_f1_macro']), )
    )
    print('     MCC: {:.2f} +- {:.2f}'.format(
        np.mean(score['test_matthews_corrcoef']), 
        np.std(score['test_matthews_corrcoef']), )
    )

# Dataset

In [3]:
# Load data
iris = load_iris()

X = iris.data[:, :]
y = iris.target

print(X.shape)
print(Counter(y))

(150, 4)
Counter({0: 50, 1: 50, 2: 50})


In [4]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

xmin = -1; xmax = 1
X = MinMaxScaler(feature_range=(xmin, xmax)).fit_transform(X)
axes = [xmin, xmax, xmin, xmax]

train_size = 0.8
test_size = 0.2
X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    train_size=train_size, test_size=test_size,
    stratify=y, random_state=seed
)

# RBF-SVM

In [5]:
param_grid = {
    "gamma": [0.01, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0, 5.0, 10.0],
    "C": [1, 2, 4, 6, 8, 10, 100],
}

gs = GridSearchCV(
    estimator=SVC(kernel='rbf', random_state=seed),
    param_grid=param_grid,
    scoring='balanced_accuracy',
    n_jobs=1,
    refit=True,
    cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=seed),
)
gs.fit(X_train, y_train)

print('Best grid search parameters:', gs.best_params_)
print('Best training score:', gs.best_score_)
clf = OneVsRestClassifier(gs.best_estimator_)
clf.fit(X_train, y_train)

print('Train set:')
y_pred = clf.predict(X_train)
print(metrics.classification_report(y_true=y_train, y_pred=y_pred, digits=2))

print('Test set:')
y_pred = clf.predict(X_test)
print(metrics.classification_report(y_true=y_test, y_pred=y_pred, digits=2))

y_pred = clf.predict(X_test)
acc = metrics.balanced_accuracy_score(y_true=y_test, y_pred=y_pred)
f1  = metrics.f1_score(y_true=y_test, y_pred=y_pred, average=None)
mcc = metrics.matthews_corrcoef(y_true=y_test, y_pred=y_pred)

print(f"Accuracy: {acc:.2f}")
print(f"F1: {f1[0]:.2f} {f1[1]:.2f} {f1[2]:.2f}")
print(f"MCC: {mcc:.2f}")

Best grid search parameters: {'C': 1, 'gamma': 0.8}
Best training score: 0.9833333333333334
Train set:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        40
           1       0.97      0.97      0.97        40
           2       0.97      0.97      0.97        40

    accuracy                           0.98       120
   macro avg       0.98      0.98      0.98       120
weighted avg       0.98      0.98      0.98       120

Test set:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.90      0.95        10
           2       0.91      1.00      0.95        10

    accuracy                           0.97        30
   macro avg       0.97      0.97      0.97        30
weighted avg       0.97      0.97      0.97        30

Accuracy: 0.97
F1: 1.00 0.95 0.95
MCC: 0.95


In [6]:
clf = SVC(kernel='rbf', C=1.0, gamma=0.8, random_state=seed)
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
score = cross_validate(
        clf, X, y, scoring=["balanced_accuracy", "f1_macro", "matthews_corrcoef"], n_jobs=1, verbose=0, cv=cv,
)


print_scores(score)

Accuracy: 0.96 +- 0.02
      F1: 0.96 +- 0.03
     MCC: 0.94 +- 0.04


# QKE-SVM

In [7]:
alpha = 2.0
fm = QuantumFeatureMap(
    num_features=len(X[0]), 
    num_qubits=4,
    num_layers=1,
    gates=['H', 'RZ', 'CZ'], entanglement='ring',
)
print(fm.draw(plot_barriers=False, fold=120))

qsvc = OneVsRestClassifier(QKSVC(feature_map=fm, alpha=alpha, C=1.0, random_state=seed))
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
score = cross_validate(
    qsvc, X, y, scoring=["balanced_accuracy", "f1_macro", "matthews_corrcoef"], n_jobs=1, verbose=0, cv=cv,
)
print_scores(score)

     ┌───┐ ┌────────────┐              
q_0: ┤ H ├─┤ Rz(x[0]*α) ├──■────────■──
     ├───┤ ├────────────┤  │        │  
q_1: ┤ H ├─┤ Rz(x[1]*α) ├──■──■─────┼──
     ├───┤ ├────────────┤     │     │  
q_2: ┤ H ├─┤ Rz(x[2]*α) ├─────■──■──┼──
     ├───┤ ├────────────┤        │  │  
q_3: ┤ H ├─┤ Rz(x[3]*α) ├────────■──■──
     └───┘ └────────────┘              
Accuracy: 0.96 +- 0.02
      F1: 0.96 +- 0.03
     MCC: 0.94 +- 0.04


In [8]:
alpha = 2.5
fm = QuantumFeatureMap(
    num_features=len(X[0]), 
    num_qubits=4,
    num_layers=1,
    gates=['H', 'RZ', 'CZ'], entanglement='ring',
)
print(fm.draw(plot_barriers=False, fold=120))

qsvc = OneVsRestClassifier(QKSVC(feature_map=fm, alpha=alpha, C=1.0, random_state=seed))
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
score = cross_validate(
    qsvc, X, y, scoring=["balanced_accuracy", "f1_macro", "matthews_corrcoef"], n_jobs=1, verbose=0, cv=cv,
)
print_scores(score)

#        2 layers                     1 layer
# alpha  acc           mcc
# 0.5    0.90 +- 0.04  0.86 +- 0.05   0.89 +- 0.04  0.85 +- 0.06
# 1.0    0.95 +- 0.02  0.92 +- 0.02   0.95 +- 0.03  0.93 +- 0.04
# 1.5    0.96 +- 0.02  0.94 +- 0.04   0.96 +- 0.02  0.94 +- 0.04
# 2.0    0.95 +- 0.02  0.93 +- 0.02   0.96 +- 0.02  0.94 +- 0.04
# 2.5    0.95 +- 0.02  0.92 +- 0.02   0.96 +- 0.02  0.94 +- 0.04

#           3 layers      2 layers       1 layer
# Accuracy: 0.96 +- 0.02  0.96 +- 0.02   0.96 +- 0.02
#       F1: 0.96 +- 0.03  0.96 +- 0.03   0.96 +- 0.03
#      MCC: 0.94 +- 0.04  0.94 +- 0.04   0.94 +- 0.04

     ┌───┐ ┌────────────┐              
q_0: ┤ H ├─┤ Rz(x[0]*α) ├──■────────■──
     ├───┤ ├────────────┤  │        │  
q_1: ┤ H ├─┤ Rz(x[1]*α) ├──■──■─────┼──
     ├───┤ ├────────────┤     │     │  
q_2: ┤ H ├─┤ Rz(x[2]*α) ├─────■──■──┼──
     ├───┤ ├────────────┤        │  │  
q_3: ┤ H ├─┤ Rz(x[3]*α) ├────────■──■──
     └───┘ └────────────┘              
Accuracy: 0.96 +- 0.02
      F1: 0.96 +- 0.03
     MCC: 0.94 +- 0.04


## QKSVM GridSearch: entanglement

In [9]:
param_grid = {
    "alpha": np.arange(0.8, 2.4, 0.2),
    "C": [1],
    "n_layers": [1, 2, 3, 4],
    "n_qubits": [4],
    "feature_map": [
                    ['rx', 'cz'],
                    ['rx', 'cx'],
                    ['rx', 'swap'],
                    ['rx', 'crz'],
                    ['rx', 'crx'],
                    ['rx', 'ry', 'cz'],
                    ['rx', 'ry', 'cy'],
                    ['rx', 'ry', 'swap'],
        ]
}

gs = GridSearchCV(
    estimator=QKSVC(entanglement='ring', random_state=seed),
    param_grid=param_grid,
    scoring='balanced_accuracy',
    n_jobs=1,
    refit=True,
    cv=StratifiedKFold(n_splits=4, shuffle=True, random_state=seed),
)

gs.fit(X_train, y_train)
print(f"CV best score: {gs.best_score_}")
print(f"CV optimal hyperparameters: {gs.best_params_}")
qsvc = OneVsRestClassifier(gs.best_estimator_)
qsvc.fit(X_train, y_train)

print('Train set:')
y_pred = qsvc.predict(X_train)
print(metrics.classification_report(y_true=y_train, y_pred=y_pred))

print('Test set:')
y_pred = qsvc.predict(X_test)
print(metrics.classification_report(y_true=y_test, y_pred=y_pred))

y_pred = qsvc.predict(X_test)
acc = metrics.balanced_accuracy_score(y_true=y_test, y_pred=y_pred)
f1  = metrics.f1_score(y_true=y_test, y_pred=y_pred, average=None)
mcc = metrics.matthews_corrcoef(y_true=y_test, y_pred=y_pred)

print(f"Accuracy: {acc:.2f}")
print(f"F1: {f1[0]:.2f} {f1[1]:.2f} {f1[2]:.2f}")
print(f"MCC: {mcc:.2f}")

CV best score: 0.9833333333333334
CV optimal hyperparameters: {'C': 1, 'alpha': 0.8, 'feature_map': ['rx', 'cz'], 'n_layers': 3, 'n_qubits': 4}
Train set:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        40
           1       0.97      0.97      0.97        40
           2       0.97      0.97      0.97        40

    accuracy                           0.98       120
   macro avg       0.98      0.98      0.98       120
weighted avg       0.98      0.98      0.98       120

Test set:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.90      0.95        10
           2       0.91      1.00      0.95        10

    accuracy                           0.97        30
   macro avg       0.97      0.97      0.97        30
weighted avg       0.97      0.97      0.97        30

Accuracy: 0.97
F1: 1.00 0.95 0.95
MCC: 0.95


In [10]:
v = gs.cv_results_['mean_test_score']
s = gs.cv_results_['std_test_score']

idxs = np.where((v > 0.98) & (s < 0.4))[0]
print(idxs)

for i in idxs:
    print(i, v[i], s[i])
    print(gs.cv_results_['params'][i])

[  2  21  45  53  65  97 101 116 120 124 133 204 224 228 232 236]
2 0.9833333333333334 0.016666666666666663
{'C': 1, 'alpha': 0.8, 'feature_map': ['rx', 'cz'], 'n_layers': 3, 'n_qubits': 4}
21 0.9833333333333334 0.016666666666666663
{'C': 1, 'alpha': 0.8, 'feature_map': ['rx', 'ry', 'cz'], 'n_layers': 2, 'n_qubits': 4}
45 0.9833333333333334 0.016666666666666663
{'C': 1, 'alpha': 1.0, 'feature_map': ['rx', 'crz'], 'n_layers': 2, 'n_qubits': 4}
53 0.9833333333333334 0.016666666666666663
{'C': 1, 'alpha': 1.0, 'feature_map': ['rx', 'ry', 'cz'], 'n_layers': 2, 'n_qubits': 4}
65 0.9833333333333334 0.016666666666666663
{'C': 1, 'alpha': 1.2, 'feature_map': ['rx', 'cz'], 'n_layers': 2, 'n_qubits': 4}
97 0.9833333333333334 0.016666666666666663
{'C': 1, 'alpha': 1.4, 'feature_map': ['rx', 'cz'], 'n_layers': 2, 'n_qubits': 4}
101 0.9833333333333334 0.016666666666666663
{'C': 1, 'alpha': 1.4, 'feature_map': ['rx', 'cx'], 'n_layers': 2, 'n_qubits': 4}
116 0.9833333333333334 0.016666666666666663
{'

In [11]:
alpha = 0.8
fm = QuantumFeatureMap(
    num_features=len(X[0]), 
    num_layers=3,
    num_qubits=4,
    gates=['RX', 'CZ'], entanglement='ring',
)
print(fm.draw(plot_barriers=False, fold=120))

qsvc = OneVsRestClassifier(QKSVC(feature_map=fm, alpha=alpha, C=1.0, random_state=seed))
qsvc.fit(X_train, y_train)

print('Train set:')
y_pred = qsvc.predict(X_train)
print(metrics.classification_report(y_true=y_train, y_pred=y_pred))

print('Test set:')
y_pred = qsvc.predict(X_test)
print(metrics.classification_report(y_true=y_test, y_pred=y_pred))

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
score = cross_validate(
    qsvc, X, y, scoring=["balanced_accuracy", "f1_macro", "matthews_corrcoef"], n_jobs=1, verbose=0, cv=cv,
)
print_scores(score)

     ┌────────────┐              ┌────────────┐              ┌────────────┐              
q_0: ┤ Rx(x[0]*α) ├──■────────■──┤ Rx(x[0]*α) ├──■────────■──┤ Rx(x[0]*α) ├──■────────■──
     ├────────────┤  │        │  ├────────────┤  │        │  ├────────────┤  │        │  
q_1: ┤ Rx(x[1]*α) ├──■──■─────┼──┤ Rx(x[1]*α) ├──■──■─────┼──┤ Rx(x[1]*α) ├──■──■─────┼──
     ├────────────┤     │     │  ├────────────┤     │     │  ├────────────┤     │     │  
q_2: ┤ Rx(x[2]*α) ├─────■──■──┼──┤ Rx(x[2]*α) ├─────■──■──┼──┤ Rx(x[2]*α) ├─────■──■──┼──
     ├────────────┤        │  │  ├────────────┤        │  │  ├────────────┤        │  │  
q_3: ┤ Rx(x[3]*α) ├────────■──■──┤ Rx(x[3]*α) ├────────■──■──┤ Rx(x[3]*α) ├────────■──■──
     └────────────┘              └────────────┘              └────────────┘              
Train set:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        40
           1       0.97      0.97      0.97        40
           2    

## QKSVM GridSearch: number of qubits

In [12]:
param_grid = {
    "alpha": np.arange(0.8, 3.2, 0.2),
    "C": [1],
    "n_layers": [1, 2, 3, 4],
    "n_qubits": [4, 5, 6, 7, 8],
    "feature_map": [
                    ['rx', 'cz'],
                   ],
}

gs = GridSearchCV(
    estimator=QKSVC(entanglement='linear', random_state=seed),
    param_grid=param_grid,
    scoring='balanced_accuracy',
    n_jobs=1,
    refit=True,
    cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=seed),
)

gs.fit(X_train, y_train)
print(f"CV best score: {gs.best_score_}")
print(f"CV optimal hyperparameters: {gs.best_params_}")
qsvc = OneVsRestClassifier(gs.best_estimator_)
qsvc.fit(X_train, y_train)

print('Train set:')
y_pred = qsvc.predict(X_train)
print(metrics.classification_report(y_true=y_train, y_pred=y_pred))

print('Test set:')
y_pred = qsvc.predict(X_test)
print(metrics.classification_report(y_true=y_test, y_pred=y_pred))

y_pred = qsvc.predict(X_test)
acc = metrics.balanced_accuracy_score(y_true=y_test, y_pred=y_pred)
f1  = metrics.f1_score(y_true=y_test, y_pred=y_pred, average=None)
mcc = metrics.matthews_corrcoef(y_true=y_test, y_pred=y_pred)

print(f"Accuracy: {acc:.2f}")
print(f"F1: {f1[0]:.2f} {f1[1]:.2f} {f1[2]:.2f}")
print(f"MCC: {mcc:.2f}")

CV best score: 0.9833333333333334
CV optimal hyperparameters: {'C': 1, 'alpha': 0.8, 'feature_map': ['rx', 'cz'], 'n_layers': 2, 'n_qubits': 8}
Train set:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        40
           1       0.97      0.97      0.97        40
           2       0.97      0.97      0.97        40

    accuracy                           0.98       120
   macro avg       0.98      0.98      0.98       120
weighted avg       0.98      0.98      0.98       120

Test set:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.90      0.95        10
           2       0.91      1.00      0.95        10

    accuracy                           0.97        30
   macro avg       0.97      0.97      0.97        30
weighted avg       0.97      0.97      0.97        30

Accuracy: 0.97
F1: 1.00 0.95 0.95
MCC: 0.95


In [13]:
v = gs.cv_results_['mean_test_score']
s = gs.cv_results_['std_test_score']

idxs = np.where((v > 0.98) & (s < 0.4))[0]
print(idxs)

for i in idxs:
    print(i, v[i], s[i])
    print(gs.cv_results_['params'][i])

[  9  10  45  63  64  83  84 140]
9 0.9833333333333334 0.020412414523193135
{'C': 1, 'alpha': 0.8, 'feature_map': ['rx', 'cz'], 'n_layers': 2, 'n_qubits': 8}
10 0.9833333333333334 0.020412414523193135
{'C': 1, 'alpha': 0.8, 'feature_map': ['rx', 'cz'], 'n_layers': 3, 'n_qubits': 4}
45 0.9833333333333334 0.020412414523193135
{'C': 1, 'alpha': 1.2, 'feature_map': ['rx', 'cz'], 'n_layers': 2, 'n_qubits': 4}
63 0.9833333333333334 0.020412414523193135
{'C': 1, 'alpha': 1.4, 'feature_map': ['rx', 'cz'], 'n_layers': 1, 'n_qubits': 7}
64 0.9833333333333334 0.020412414523193135
{'C': 1, 'alpha': 1.4, 'feature_map': ['rx', 'cz'], 'n_layers': 1, 'n_qubits': 8}
83 0.9833333333333334 0.020412414523193135
{'C': 1, 'alpha': 1.5999999999999999, 'feature_map': ['rx', 'cz'], 'n_layers': 1, 'n_qubits': 7}
84 0.9833333333333334 0.020412414523193135
{'C': 1, 'alpha': 1.5999999999999999, 'feature_map': ['rx', 'cz'], 'n_layers': 1, 'n_qubits': 8}
140 0.9833333333333334 0.020412414523193135
{'C': 1, 'alpha': 

In [14]:
alpha = 0.8
fm = QuantumFeatureMap(
    num_features=len(X[0]), 
    num_layers=2,
    num_qubits=8,
    gates=['RX', 'CZ'], entanglement='linear',
)
print(fm.draw(plot_barriers=False, fold=120))

qsvc = OneVsRestClassifier(QKSVC(feature_map=fm, alpha=alpha, C=1.0, random_state=seed))
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
score = cross_validate(
    qsvc, X, y, scoring=["balanced_accuracy", "f1_macro", "matthews_corrcoef"], n_jobs=1, verbose=0, cv=cv,
)
print_scores(score)

     ┌────────────┐                       ┌────────────┐                       
q_0: ┤ Rx(x[0]*α) ├──■────────────────────┤ Rx(x[0]*α) ├──■────────────────────
     ├────────────┤  │                    ├────────────┤  │                    
q_1: ┤ Rx(x[1]*α) ├──■──■─────────────────┤ Rx(x[1]*α) ├──■──■─────────────────
     ├────────────┤     │                 ├────────────┤     │                 
q_2: ┤ Rx(x[2]*α) ├─────■──■──────────────┤ Rx(x[2]*α) ├─────■──■──────────────
     ├────────────┤        │              ├────────────┤        │              
q_3: ┤ Rx(x[3]*α) ├────────■──■───────────┤ Rx(x[3]*α) ├────────■──■───────────
     ├────────────┤           │           ├────────────┤           │           
q_4: ┤ Rx(x[0]*α) ├───────────■──■────────┤ Rx(x[0]*α) ├───────────■──■────────
     ├────────────┤              │        ├────────────┤              │        
q_5: ┤ Rx(x[1]*α) ├──────────────■──■─────┤ Rx(x[1]*α) ├──────────────■──■─────
     ├────────────┤                 │   