# Quantum Machine Learning Project

### Team members:
* Alan Vásquez
* Ángel Álvarez
* María Linares
* Jaissar Cammarata
* Luis Villoria
* Milagro Roja

In [None]:
from qiskit import  Aer
from qiskit.utils import QuantumInstance, algorithm_globals
from qiskit.aqua.algorithms import VQC
from qiskit.algorithms.optimizers import SPSA
from qiskit.circuit.library import TwoLocal, PauliFeatureMap
from qiskit.aqua.utils import  map_label_to_class_name

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from math import *
from sklearn.metrics import *

In [None]:
seed = 7777
algorithm_globals.random_seed = seed

# Data import and first look

In [None]:
# We read out the files and see what are they composed
test_df = pd.read_csv(r'mock_test_set.csv', delimiter=',') 
train_df = pd.read_csv(r'mock_train_set.csv', delimiter=',')

In [None]:
train_df.head()

In [None]:
test_df.head()

In [None]:
train_labels = train_df['4'].values.tolist()
train_data = train_df[['0','1','2','3']].values.tolist()

In [None]:
test_labels = test_df['4'].values.tolist()
test_data = test_df[['0','1','2','3']].values.tolist()

In [None]:
train_df.plot(subplots=True, figsize=(8, 8))

In [None]:
test_df.plot(subplots=True, figsize=(8, 8))

# Normalization and separation of the data

In [None]:
n_train = len(train_data)
n_test = len(test_data)

feature_dim = len(test_data[0])

# Separation of the data for zero's and one's
one_train = []
zero_train = []

for i in range(n_train):
    if train_labels[i] == 0:
        zero_train.append(train_data[i])
    else:
        one_train.append(train_data[i])

one_test = []
zero_test = []

for i in range(n_test):
    if test_labels[i] == 0:
        zero_test.append(test_data[i])
    else:
        one_test.append(test_data[i])

# Normalization
np.linalg.norm(zero_train)
zero_train_n  =  [ [ i/np.linalg.norm(j)  for i in j ] for j in zero_train ]
one_train_n = [ [ i/np.linalg.norm(j)  for i in j ] for j in one_train ]
        
zero_test_n  = [ [ i/np.linalg.norm(j)  for i in j ] for j in zero_test ]
one_test_n  = [ [ i/np.linalg.norm(j)  for i in j ] for j in one_test ]

# Reduction of size and creation of the valition set from the train set.
size_train = 0.9 # 90% of the train set
size_test = 1 # 100% of the train set
size_val = 0.1 #10% of the train set for validation set
zero_train_r, zero_val, _ = np.split(zero_train_n,[ int(len(zero_train_n)*size_train), int(len(zero_train_n)*(size_train+size_val))])
one_train_r, one_val, _ = np.split(one_train_n,[ int(len(one_train_n)*size_train), int(len(one_train_n)*(size_train+size_val))])

zero_test_r = zero_test_n[:int(len(zero_test_n)*size_test)]
one_test_r = one_test_n[:int(len(one_test_n)*size_test)]

# Division in classes for each data set
training_input = {'A':zero_train_r, 'B':one_train_r}
test_input = {'A':zero_test_r, 'B':one_test_r}

# Validation set
datapoints = []
datapoints.append(np.concatenate((zero_val, one_val)))
labels = np.concatenate(([0 for _ in range(len(zero_val))], [1 for _ in range(len(one_val))]))
datapoints.append(labels)

class_to_label = {'A':0, 'B':1}

In [None]:
print(f"Datapoints for training: {len(zero_train_r)+len(one_train_r)}")
print(f"Datapoints for testing: {len(zero_test_r)+len(one_test_r)}")
print(f"Datapoints for validation: {len(zero_val)+len(one_val)}")

# Feature Map

In [None]:
# The more general and arbitrary feature map available is the Paule Feature Map changing the paulis parameters
pauli_fm = PauliFeatureMap(feature_dimension=feature_dim, reps=2, paulis=[['Z','X','ZY']],entanglement='full')

# VQC

In [None]:
two = TwoLocal(feature_dim, ['ry'],'cx', reps=2, entanglement='full')

# Optimizer

In [None]:
def store_intermediate_result(evaluation, parameter, cost, stepsize, accept):
    """
    This function receives the intermediate values of the optimizer and saves the cost values to plot them 
    in a future.
    """
    costs.append(cost)
    parameters.append(parameter)

In [None]:
# We select the SPSA optimizer since is the optimal optimizer for large population of values
spsa = SPSA(maxiter=40, callback=store_intermediate_result)

# Circuit Simulation

In [None]:
backend = Aer.get_backend('qasm_simulator')
backend_op =  {'method':'statevector'}
quantum_instance = QuantumInstance(backend, shots=1024, seed_simulator=seed, seed_transpiler=seed, backend_options=backend_op)

# Analysis

In [None]:
# Aux functions

def test_accuracy(actual, predicted):
    """
    This function calculates the accuracy between the prection classes and the actual classes.
    """
    correct = 0
    for i in range(len(actual)):
        if actual[i] == predicted[i]:
            correct = correct+1
            
    return correct/len(actual)

In [None]:
# Store values for one repetion of each fm and vqc

result_data = {}

fm = pauli_fm
varqc = two

# This list will fill in the evaluation of the VQC
counts = []
costs = []
parameters = []

# Build of the VQC circuit
vqc = VQC(optimizer=spsa, feature_map=fm, var_form=varqc, training_dataset=training_input, test_dataset=test_input,datapoints=datapoints[0])

In [None]:

#Execution of the VQC
result = vqc.run(quantum_instance)

In [None]:
# Extraction of the final prediction of the VQC circuit
actual = map_label_to_class_name(datapoints[1], vqc.label_to_class)
predicted = result['predicted_classes']

# Calculation of the accuracy values, final accuracy of the VQC and the Confusion matrix metrics report.
accuracy = []
final_accuracy = test_accuracy(actual, predicted)
conf_mat = classification_report(actual, predicted, labels=['A','B'])


# Save of the results of these data
result_data = {
    'parameters': parameters,
    'costs': costs,
    'accuracy_vals':accuracy,
    'result' : result,
    'final_accuracy' : final_accuracy,
    'confusion_matrix' : conf_mat,
    'actual' : actual,
    'predicted' : predicted
    }

In [None]:
plt.plot(result_data['costs'])
plt.xlabel('Iterations')
plt.ylabel('Cost function')
plt.title('Cost function vs Iterations')

In [None]:
print(result_data['confusion_matrix'])

In [None]:
result_data['final_accuracy']