In [111]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from qiskit import BasicAer
from qiskit.circuit.library import ZZFeatureMap, ZFeatureMap, PauliFeatureMap

from qiskit.aqua import QuantumInstance, aqua_globals
from qiskit.aqua.algorithms import QSVM, VQC
from qiskit.aqua.utils import split_dataset_to_data_and_labels, map_label_to_class_name

seed = 10599
aqua_globals.random_seed = seed

import sys
import os
from pathlib import Path
import pickle
import numpy as np
import time
import sklearn.model_selection as model_selection
from sklearn.decomposition import PCA
main_folder=str(Path.cwd().parent) 
sys.path.append(main_folder) 
data_folder = f'{main_folder}/data'


In [59]:
from tensorflow.keras import optimizers
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, UpSampling2D, Dropout, Flatten, Dense, Reshape, Input

In [7]:
from sklearn.preprocessing import LabelEncoder

In [96]:
with open(f'{data_folder}/processed/DR16_processed_X.pkl','rb') as input_file:
    X = pickle.load(input_file)
with open(f'{data_folder}/processed/DR16_processed_y.pkl','rb') as input_file:
    y = pickle.load(input_file)

In [97]:
print(X.shape)
print(y.shape)

(100000, 8)
(100000,)


In [98]:
le = LabelEncoder()
y_num = le.fit_transform(y.values)

In [99]:
X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y_num, train_size=0.8, test_size=0.2, random_state=101, stratify=y_num)

print(X_train.shape)
print(X_test.shape)

(80000, 8)
(20000, 8)


In [13]:
(unique, counts) = np.unique(y_train, return_counts=True)
frequencies = np.asarray((unique, counts)).T
frequencies

array([[    0, 41058],
       [    1,  8465],
       [    2, 30477]], dtype=int64)

In [14]:
n_examples = 500
example_images = X_test[0:n_examples]
example_labels = y_test[0:n_examples]

In [44]:
encoded_dim = 3
ncol = X_train.shape[1]
input_dim = Input(shape = (ncol, ))
input_dim

<KerasTensor: shape=(None, 8) dtype=float32 (created by layer 'input_5')>

In [60]:
encoded1 = Dense(7, activation = 'relu')(input_dim)
encoded2 = Dense(5, activation = 'relu')(encoded1)
encoded3 = Dense(3, activation = 'relu')(encoded2)
encoded4 = Dense(encoded_dim, activation = 'relu')(encoded3)

# Decoder Layers
decoded1 = Dense(3, activation = 'relu')(encoded4)
decoded2 = Dense(5, activation = 'relu')(decoded1)
decoded3 = Dense(7, activation = 'relu')(decoded2)
decoded4 = Dense(ncol, activation = 'sigmoid')(decoded3)

# Combine Encoder and Deocder layers
autoencoder = Model(inputs = input_dim, outputs = decoded4)

optim = optimizers.Adam(
    learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, amsgrad=False,
    name='Adam'
)

# Compile the Model
autoencoder.compile(optimizer = optim, loss = 'binary_crossentropy')

In [61]:
autoencoder.summary()

Model: "model_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         [(None, 8)]               0         
_________________________________________________________________
dense_91 (Dense)             (None, 7)                 63        
_________________________________________________________________
dense_92 (Dense)             (None, 5)                 40        
_________________________________________________________________
dense_93 (Dense)             (None, 3)                 18        
_________________________________________________________________
dense_94 (Dense)             (None, 3)                 12        
_________________________________________________________________
dense_95 (Dense)             (None, 3)                 12        
_________________________________________________________________
dense_96 (Dense)             (None, 5)                 20 

In [62]:
autoencoder.fit(X_train, X_train, epochs = 30, batch_size = 64, shuffle = False, validation_data = (X_test, X_test))

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x1fc3f0f4be0>

In [100]:
encoder = Model(inputs = input_dim, outputs = encoded4)
encoded_input = Input(shape = (encoded_dim, ))

In [101]:
encoded_train = pd.DataFrame(encoder.predict(X))
encoded_train = encoded_train.add_prefix('feature_')

encoded_test = pd.DataFrame(encoder.predict(X_test))
encoded_test = encoded_test.add_prefix('feature_')

In [81]:
# encoded_train['target'] = y_num
# encoded_test['target'] = y_test

In [102]:
print(encoded_train.shape)
encoded_train.head()

(100000, 3)


Unnamed: 0,feature_0,feature_1,feature_2
0,1.744848,1.656402,0.858677
1,1.742593,2.473604,0.890759
2,2.046029,2.894971,1.692442
3,1.69563,3.002543,0.958708
4,1.534996,2.967988,0.604628


In [67]:
encoded_train['feature_1'].value_counts()

2.845912    2
3.081599    2
2.958723    2
2.697657    2
2.547033    2
           ..
2.747480    1
0.577970    1
2.893318    1
2.962333    1
2.739542    1
Name: feature_1, Length: 79557, dtype: int64

## QSVM

In [86]:
from qiskit.aqua.components.multiclass_extensions import AllPairs, OneAgainstRest, ErrorCorrectingCode

In [103]:
def split_balanced(data, target, train_size=100, test_size=60):
    
    np.random.seed(0)

    classes = np.unique(target)
    # can give test_size as fraction of input data size of number of samples
    if test_size<1:
        n_test = np.round(len(target)*test_size)
    else:
        n_test = test_size
    n_train = train_size #max(0,len(target)-n_test)
    n_train_per_class = max(1,int(np.floor(n_train/len(classes))))
    n_test_per_class = max(1,int(np.floor(n_test/len(classes))))

    ixs = []
    for cl in classes:
        if (n_train_per_class+n_test_per_class) > np.sum(target==cl):
            # if data has too few samples for this class, do upsampling
            # split the data to training and testing before sampling so data points won't be
            #  shared among training and test data
            splitix = int(np.ceil(n_train_per_class/(n_train_per_class+n_test_per_class)*np.sum(target==cl)))
            ixs.append(np.r_[np.random.choice(np.nonzero(target==cl)[0][:splitix], n_train_per_class),
                np.random.choice(np.nonzero(target==cl)[0][splitix:], n_test_per_class)])
        else:
            ixs.append(np.random.choice(np.nonzero(target==cl)[0], n_train_per_class+n_test_per_class,
                replace=False))

    # take same num of samples from all classes
    ix_train = np.concatenate([x[:n_train_per_class] for x in ixs])
    ix_test = np.concatenate([x[n_train_per_class:(n_train_per_class+n_test_per_class)] for x in ixs])

    X_train = data[ix_train,:]
    X_test = data[ix_test,:]
    y_train = target[ix_train]
    y_test = target[ix_test]

    return X_train, X_test, y_train, y_test

In [113]:
X_train, X_test, y_train, y_test = split_balanced(np.array(encoded_train), y_num, train_size=100, test_size=60)

print(X_train.shape)
print(X_test.shape)

print(y_train.shape)
print(y_test.shape)

(99, 3)
(60, 3)
(99,)
(60,)


In [114]:
(unique, counts) = np.unique(y_train, return_counts=True)
frequencies = np.asarray((unique, counts)).T
print(" Training data distribution\n", frequencies)

(unique, counts) = np.unique(y_test, return_counts=True)
frequencies = np.asarray((unique, counts)).T
print(" Test data distribution\n", frequencies)

 Training data distribution
 [[ 0 33]
 [ 1 33]
 [ 2 33]]
 Test data distribution
 [[ 0 20]
 [ 1 20]
 [ 2 20]]


In [115]:
class_labels = list(set(y))
class_labels

['GALAXY', 'QSO', 'STAR']

In [116]:
training_size = len(X_train)
test_size = len(X_test)

#this is where I transform our dataframe to Dict[key:np.array]
if class_labels[0].isdigit():
        # Pick training size number of samples from each distro
    training_input = {key: (X_train[y_train == int(key), :])[:training_size] for k, key in enumerate(class_labels)}
    #test_input_extra = {key: (X_test_extra[y_test_extra == int(key), :])[:test_size_extra] for k, key in enumerate(class_labels_Star)}
    test_input = {key: (X_test[y_test == int(key), :])[: test_size] for k, key in enumerate(class_labels)}
else:
    # if they aren't
    training_input = {key: (X_train[y_train == k, :])[:training_size] for k, key in enumerate(class_labels)}
    #test_input_extra = {key: (X_test_extra[y_test_extra == k, :])[:test_size_extra] for k, key in enumerate(class_labels_Star)}
    test_input = {key: (X_test[y_test == k, :])[:test_size] for k, key in enumerate(class_labels)}

In [117]:
seed = 10598
feature_dim = 3

In [118]:
training_input

{'GALAXY': array([[1.6240178 , 2.982646  , 0.76894796],
        [1.0064902 , 0.9820663 , 0.        ],
        [1.7363446 , 2.6736493 , 0.95900154],
        [1.5811248 , 2.7609663 , 0.7163141 ],
        [1.7886126 , 2.4221861 , 1.0641718 ],
        [1.5140903 , 2.4925282 , 0.55449224],
        [2.054512  , 3.3286853 , 1.7986248 ],
        [2.2617693 , 3.8315532 , 2.2655025 ],
        [1.5902625 , 2.5325584 , 0.6489163 ],
        [2.1236913 , 3.4391067 , 1.8855752 ],
        [1.4473133 , 2.7450457 , 0.3264413 ],
        [1.4603544 , 2.8828564 , 0.43791997],
        [1.5766478 , 2.5438974 , 0.5834159 ],
        [1.4681133 , 2.277638  , 0.36074543],
        [1.8156742 , 3.0517778 , 1.1367811 ],
        [1.38679   , 2.9005938 , 0.34282726],
        [1.3891788 , 3.8393967 , 0.5264163 ],
        [1.8061213 , 3.1406417 , 1.2205659 ],
        [1.5762806 , 3.0622594 , 0.7471432 ],
        [2.0080671 , 2.9660532 , 1.5598528 ],
        [1.8002119 , 3.4380507 , 1.1939175 ],
        [1.6028671 , 2.6

In [121]:
feature_map = PauliFeatureMap(feature_dimension=feature_dim, reps=1, paulis = ['Z','X','ZY'])
qsvm = QSVM(feature_map, training_input, test_input, multiclass_extension = ErrorCorrectingCode())

backend = BasicAer.get_backend('qasm_simulator')
quantum_instance = QuantumInstance(backend, shots=1024, seed_simulator=seed, seed_transpiler=seed)

result = qsvm.run(quantum_instance)

print("testing success ratio: ", result['testing_accuracy'])

RuntimeError: module compiled against API version 0xe but this version of numpy is 0xd

MissingOptionalLibraryError: "The 'CVXPY' library is required to use 'optimize_svm'. You can install it with 'pip install 'qiskit-aqua[cvx]''.  numpy.core.multiarray failed to import."

## Classical SVM

In [120]:
from qiskit.aqua.algorithms import SklearnSVM

result = SklearnSVM(training_input, test_input, multiclass_extension = AllPairs()).run()

print(f'Testing success ratio: {result["testing_accuracy"]}')

Testing success ratio: 0.6833333333333333
