In [2]:
import pandas as pd
import numpy as np
from pennylane import numpy as np
from sklearn.preprocessing import normalize
from sklearn.preprocessing import StandardScaler

import pennylane as qml
from pennylane.templates.embeddings import AngleEmbedding, AmplitudeEmbedding
from pennylane.optimize import AdamOptimizer
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA


import time
start = time.time()

In [3]:
# Read out CSV and adjusting the sample needed

df = pd.read_csv('UCI_Credit_Card.csv', sep=',')
df = df.astype(float)
train,test = train_test_split(df, test_size=0.20)
train_set = train
test_set = test
train_set = train_set.sample(800)
test_set = test_set.sample(200)

In [4]:
# Review the information related to the dataframe

df.info

<bound method DataFrame.info of             ID  LIMIT_BAL  SEX  EDUCATION  MARRIAGE   AGE  PAY_0  PAY_2  \
0          1.0    20000.0  2.0        2.0       1.0  24.0    2.0    2.0   
1          2.0   120000.0  2.0        2.0       2.0  26.0   -1.0    2.0   
2          3.0    90000.0  2.0        2.0       2.0  34.0    0.0    0.0   
3          4.0    50000.0  2.0        2.0       1.0  37.0    0.0    0.0   
4          5.0    50000.0  1.0        2.0       1.0  57.0   -1.0    0.0   
...        ...        ...  ...        ...       ...   ...    ...    ...   
29995  29996.0   220000.0  1.0        3.0       1.0  39.0    0.0    0.0   
29996  29997.0   150000.0  1.0        3.0       2.0  43.0   -1.0   -1.0   
29997  29998.0    30000.0  1.0        2.0       2.0  37.0    4.0    3.0   
29998  29999.0    80000.0  1.0        3.0       1.0  41.0    1.0   -1.0   
29999  30000.0    50000.0  1.0        2.0       1.0  46.0    0.0    0.0   

       PAY_3  PAY_4  ...  BILL_AMT4  BILL_AMT5  BILL_AMT6  PAY_AMT1

In [5]:
# Separation of labels

x_train = train_set
y_train = train_set[['default.payment.next.month']]

x_test = test_set
y_test = test_set[['default.payment.next.month']]

In [6]:
y_train

Unnamed: 0,default.payment.next.month
24579,0.0
4131,0.0
29876,0.0
9166,1.0
8337,0.0
...,...
8972,1.0
23375,0.0
5743,0.0
8530,0.0


In [7]:
y_test

Unnamed: 0,default.payment.next.month
15733,0.0
13425,0.0
28715,0.0
6049,0.0
12904,1.0
...,...
23400,0.0
5191,1.0
15820,0.0
3893,0.0


In [8]:
# Splitting the x_train dataframe by half without any analysis or criteria

features_a = x_train.iloc[:,:12]
features_b = x_train.iloc[:,12:]

In [9]:
# Splitting the x_test dataframe by half without any analysis or criteria

features_a_test = x_test.iloc[:,:12]
features_b_test = x_test.iloc[:,12:]

In [10]:
#Applying LDA to x_train features group and since we have only two target classes the fitting process will force 1 component per group of features

lda = LDA(n_components=1)
features_lda_1 = lda.fit_transform(features_a, y_train)
features_lda_2 = lda.fit_transform(features_b, y_train)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [11]:
# Applying LDA to x_test features group and since we have only two target classes the fitting process will force 1 component per group of features

features_lda_1_test = lda.fit_transform(features_a_test, y_test)
features_lda_2_test = lda.fit_transform(features_b_test, y_test)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


In [12]:
# Transforming arrays into DataFrame to apply join later

features_lda_1 = pd.DataFrame(features_lda_1)
features_lda_2 = pd.DataFrame(features_lda_2)
features_lda_1_test = pd.DataFrame(features_lda_1_test)
features_lda_2_test = pd.DataFrame(features_lda_2_test)

In [13]:
# Join between the single feature columns

x_train_lda = features_lda_1.join(features_lda_2, lsuffix="_left", rsuffix="_right")
x_test_lda = features_lda_1_test.join(features_lda_2_test, lsuffix="_left", rsuffix="_right")

In [14]:
x_train_lda

Unnamed: 0,0_left,0_right
0,-1.345405,-0.301301
1,-0.612797,-0.796739
2,-1.434095,-0.158422
3,1.781708,0.511760
4,1.776393,0.503258
...,...,...
795,-0.162334,0.492042
796,-0.091364,0.202985
797,-0.714028,0.384298
798,-0.212374,0.337102


In [15]:
x_test_lda

Unnamed: 0,0_left,0_right
0,-0.276840,0.529014
1,-0.897600,0.465746
2,-2.784959,-2.329779
3,1.929075,1.810893
4,3.413742,0.340819
...,...,...
195,-1.574110,0.584930
196,-0.080561,0.498835
197,0.924304,0.496149
198,-0.238253,-0.224738


In [16]:
y_test

Unnamed: 0,default.payment.next.month
15733,0.0
13425,0.0
28715,0.0
6049,0.0
12904,1.0
...,...
23400,0.0
5191,1.0
15820,0.0
3893,0.0


In [17]:
y_train

Unnamed: 0,default.payment.next.month
24579,0.0
4131,0.0
29876,0.0
9166,1.0
8337,0.0
...,...
8972,1.0
23375,0.0
5743,0.0
8530,0.0


In [18]:
# Normalization

std_scale = StandardScaler().fit(x_train_lda)
data = std_scale.transform(x_train_lda)

std_scale = StandardScaler().fit(x_test_lda)
x_test_lda_n = std_scale.transform(x_test_lda)

In [19]:
x_test_lda_n

array([[-2.60355335e-01,  5.20188317e-01],
       [-8.44151636e-01,  4.57976168e-01],
       [-2.61912762e+00, -2.29091040e+00],
       [ 1.81420778e+00,  1.78068104e+00],
       [ 3.21046940e+00,  3.35133452e-01],
       [ 9.86853070e-02, -9.54663377e-01],
       [ 4.47286704e-01, -1.03899734e+00],
       [ 1.65556882e-01, -4.88338813e-01],
       [ 4.91059299e-01,  1.61217780e-01],
       [ 1.67909243e+00,  8.96132867e-01],
       [-2.78474358e-01,  1.85785304e-02],
       [ 5.71229589e-01, -1.26331485e+00],
       [-2.82456377e-01,  2.31972149e-01],
       [-1.61001124e+00, -1.48190487e+00],
       [ 9.19883395e-01,  4.87409206e-01],
       [ 2.42894927e+00,  1.38171318e+00],
       [-1.52647275e+00, -2.80356287e-01],
       [ 4.52346617e-01,  1.48484406e+00],
       [ 8.06895980e-01,  4.61878459e-01],
       [ 7.01790577e-02,  3.89578154e-01],
       [-6.13070713e-01,  2.29702476e+00],
       [-8.52632754e-01,  5.25408549e-01],
       [ 3.43891430e-01, -4.70547513e-01],
       [-9.

In [20]:
# Dimensions definition

n_dim = 2

In [21]:
# Review the balance of the target variable in train

y_test.value_counts(normalize=True)*100

default.payment.next.month
0.0                           84.0
1.0                           16.0
dtype: float64

In [22]:
# Review the balance of the target variable in test

y_test.value_counts(normalize=True)*100

default.payment.next.month
0.0                           84.0
1.0                           16.0
dtype: float64

In [23]:
# Angle Encoding

num_qubits = n_dim

dev = qml.device('default.qubit', wires = num_qubits)

@qml.qnode(dev)
def circuit(parameters, data):
    for i in range(num_qubits):
        qml.Hadamard(wires = i)
    
    AngleEmbedding(features = data, wires = range(num_qubits), rotation = 'Y')
    
    qml.StronglyEntanglingLayers(weights = parameters, wires = range(num_qubits))
    
    return qml.expval(qml.PauliZ(0))

In [24]:
num_layers = 5
weights_init = 0.01 * np.random.randn(num_layers, num_qubits, 3, requires_grad=True)
bias_init = np.array(0.0, requires_grad=True)

print(weights_init, bias_init)

[[[-0.01351294 -0.01386338 -0.01311764]
  [-0.00128187  0.00767486 -0.009503  ]]

 [[ 0.00720301 -0.00030689 -0.01311703]
  [-0.02596319  0.01247156  0.00823375]]

 [[ 0.01943211 -0.00278012 -0.00379277]
  [-0.00124978 -0.00246835 -0.00150522]]

 [[-0.001015    0.01707059  0.0014343 ]
  [ 0.007953   -0.01585231 -0.01451275]]

 [[ 0.00681931 -0.015388    0.00787622]
  [-0.01289089  0.00444768  0.018959  ]]] 0.0


In [25]:
circuit(weights_init, data[0])

tensor(0.28526829, requires_grad=True)

In [26]:
def variational_classifier(weights, bias, x):
    return circuit(weights, x) + bias

In [27]:
def square_loss(labels, predictions):
    loss = 0
    for l, p in zip(labels, predictions):
        loss = loss + (l - p) ** 2

    loss = loss / len(labels)
    return loss

In [28]:
def accuracy(labels, predictions):

    loss = 0
    for l, p in zip(labels, predictions):
        if abs(l - p) < 1e-5:
            loss = loss + 1
    loss = loss / len(labels)

    return loss

In [29]:
def cost(weights, bias, X, Y):
    predictions = [variational_classifier(weights, bias, x) for x in X]
    return square_loss(Y, predictions)

In [30]:
Y = np.array(y_train.values[:,0] * 2 - np.ones(len(y_train.values[:,0])), requires_grad = False)  # shift label from {0, 1} to {-1, 1}
X = np.array(data, requires_grad=False)

for i in range(5):
    print("X = {}, Y = {: d}".format(list(X[i]), int(Y[i])))

X = [tensor(-1.27726695, requires_grad=False), tensor(-0.29727767, requires_grad=False)], Y = -1
X = [tensor(-0.58176221, requires_grad=False), tensor(-0.78610004, requires_grad=False)], Y = -1
X = [tensor(-1.36146548, requires_grad=False), tensor(-0.1563068, requires_grad=False)], Y = -1
X = [tensor(1.69147396, requires_grad=False), tensor(0.50492603, requires_grad=False)], Y =  1
X = [tensor(1.68642764, requires_grad=False), tensor(0.49653837, requires_grad=False)], Y = -1


In [31]:
opt = AdamOptimizer(stepsize=0.1, beta1=0.9, beta2=0.99, eps=1e-08)
batch_size = 10

In [32]:
weights = weights_init
bias = bias_init

wbest = 0
bbest = 0
abest = 0

for it in range(20):

    # weights update by one optimizer step

    batch_index = np.random.randint(0, len(X), (batch_size,))
    X_batch = X[batch_index]
    Y_batch = Y[batch_index]
    weights, bias, _, _ = opt.step(cost, weights, bias, X_batch, Y_batch)

    # Compute the accuracy
    predictions = [np.sign(variational_classifier(weights, bias, x)) for x in X]
    
    if accuracy(Y, predictions) > abest:
        wbest = weights
        bbest = bias
        abest = accuracy(Y, predictions)
        print('New best')

    acc = accuracy(Y, predictions)

    print(
        "Iter: {:5d} | Cost: {:0.7f} | Accuracy: {:0.7f} ".format(
            it + 1, cost(weights, bias, X, Y), acc
        )
    )

New best
Iter:     1 | Cost: 0.8098898 | Accuracy: 0.7300000 
New best
Iter:     2 | Cost: 0.6831107 | Accuracy: 0.7675000 
New best
Iter:     3 | Cost: 0.6737253 | Accuracy: 0.7775000 
Iter:     4 | Cost: 0.6795760 | Accuracy: 0.7675000 
Iter:     5 | Cost: 0.7620563 | Accuracy: 0.7187500 
Iter:     6 | Cost: 0.7116177 | Accuracy: 0.7525000 
Iter:     7 | Cost: 0.7053307 | Accuracy: 0.7687500 
New best
Iter:     8 | Cost: 0.7806209 | Accuracy: 0.7800000 
New best
Iter:     9 | Cost: 0.8004703 | Accuracy: 0.7812500 
New best
Iter:    10 | Cost: 0.6870119 | Accuracy: 0.7900000 
New best
Iter:    11 | Cost: 0.6127008 | Accuracy: 0.7962500 
Iter:    12 | Cost: 0.6746547 | Accuracy: 0.7912500 
Iter:    13 | Cost: 0.7723625 | Accuracy: 0.7862500 
Iter:    14 | Cost: 0.8143423 | Accuracy: 0.7650000 
Iter:    15 | Cost: 0.8094549 | Accuracy: 0.7625000 
Iter:    16 | Cost: 0.7198515 | Accuracy: 0.7762500 
Iter:    17 | Cost: 0.6490370 | Accuracy: 0.7862500 
Iter:    18 | Cost: 0.6119833 | Accu

In [33]:
Yte = np.array(y_test.values[:,0] * 2 - np.ones(len(y_test.values[:,0])), requires_grad = False)
Xte = np.array(normalize(x_test_lda_n), requires_grad=False)

In [34]:
predictions = [np.sign(variational_classifier(wbest, bbest, x)) for x in Xte]
pred = [np.sign(variational_classifier(wbest, bbest, x)) for x in X]
acc = accuracy(Yte, predictions)

print(f'Cost: {cost(wbest, bbest, Xte, Yte)}, Accuracy: {np.round(acc, 2) * 100}%')

Cost: 0.4756019147241009, Accuracy: 84.0%


In [35]:
pd.DataFrame((predictions, Yte), ('Predictions', 'Test')).T

Unnamed: 0,Predictions,Test
0,-1.0,-1.0
1,-1.0,-1.0
2,-1.0,-1.0
3,-1.0,-1.0
4,-1.0,1.0
...,...,...
195,-1.0,-1.0
196,-1.0,1.0
197,-1.0,-1.0
198,-1.0,-1.0


In [36]:
# Print the classification report and important metrics

print(metrics.classification_report(predictions,Yte))
print(metrics.precision_score(predictions,Yte))
print(metrics.recall_score(predictions,Yte))
print(metrics.f1_score(predictions,Yte))
print(metrics.balanced_accuracy_score(predictions,Yte))

              precision    recall  f1-score   support

        -1.0       1.00      0.84      0.91       200
         1.0       0.00      0.00      0.00         0

    accuracy                           0.84       200
   macro avg       0.50      0.42      0.46       200
weighted avg       1.00      0.84      0.91       200

0.0
0.0
0.0
0.84


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
