In [35]:
import pandas as pd
import numpy as np
import math
from qiskit import QuantumCircuit
from qiskit import Aer, execute

### Lectura del fichero CSV
* Se importa el archivo csv.
* se eliminan las columnas que no son datos y convierte el dataset a binario
* Se separa el dataset en dos, filtrando por train y test

In [36]:
data = pd.read_csv('Montabilidad_to_Quantum.csv',delimiter=';', encoding='latin-1')

columnas_a_quitar = ['file_type','metadata_path', 'metadata_date', 'metadata_size', 'query_key', 'text', 'Expediente', 'Tipo_vehículo', 'Código', 'N_Produccion', 'Fecha_Tecnica', 'metadata_file']
data = data.drop(columnas_a_quitar, axis=1)

# Filtrado y construcción del dataframe binario
vectores_train = data[data['Set'] == 'Train']
vectores_test = data[data['Set'] == 'Test']
vectores_train_bin = data.iloc[:, :-3].apply(lambda x: x > 0).astype(int).to_numpy()[:20, :]
vectores_test_bin = data.iloc[:, :-3].apply(lambda x: x > 0).astype(int).to_numpy()[:20, :]



In [37]:
vectores_train_extended = np.pad(vectores_train_bin, ((0, 0), (0, 1024 - vectores_train_bin.shape[1])), mode='constant')
vectores_test_extended = np.pad(vectores_test_bin, ((0, 0), (0, 1024 - vectores_test_bin.shape[1])), mode='constant')

numero_componentes = len(vectores_train_extended[0])
nqubits = math.ceil(np.log2(numero_componentes))    # compute how many qubits needed to encode either x or y

In [38]:
def inner_prod(vec1, vec2):
    #first check lengths are equal
    if len(vec1) != len(vec2):
        raise ValueError('Lengths of states are not equal')

    circ = QuantumCircuit(nqubits+1,1)
    vec = np.concatenate([vec1,vec2])/np.sqrt(2)

    circ.initialize(vec, range(nqubits+1))
    circ.h(nqubits)
    circ.measure(nqubits,0)

    backend = Aer.get_backend('qasm_simulator')
    job = execute(circ, backend, shots=20000)

    result = job.result()
    outputstate = result.get_counts(circ)

    if ('0' in outputstate.keys()):
        m_sum = float(outputstate["0"])/20000
    else:
        m_sum = 0

    return 2*m_sum-1

In [39]:
exoticos = []
def itera_por_matriz_cuantica():
  for i, test in enumerate(vectores_test_extended):
    for j, train in enumerate(vectores_train_extended):
        train_norm = np.linalg.norm(train)
        test_norm = np.linalg.norm(test)
        train_normalizado = train/train_norm
        test_normalizado = test/test_norm
        correlacion_entre_variables = inner_prod(test_normalizado, train_normalizado)
        if correlacion_entre_variables == 0 and test not in exoticos :
           exoticos.append(test)
        yield [f'test_{i}', f'train_{j}', correlacion_entre_variables]

In [40]:
distancia_vectores = pd.DataFrame(itera_por_matriz_cuantica())
display(distancia_vectores)

Unnamed: 0,0,1,2
0,test_0,train_0,1.0000
1,test_0,train_1,0.5516
2,test_0,train_2,0.7191
3,test_0,train_3,0.5659
4,test_0,train_4,0.4354
...,...,...,...
395,test_19,train_15,0.8549
396,test_19,train_16,0.8844
397,test_19,train_17,0.8641
398,test_19,train_18,0.7405
