In [1]:
import pandas as pd
import numpy as np
import math
from qiskit import QuantumCircuit
from qiskit import Aer, execute

In [2]:
def preparacion_vectores_train_test(data):
    vectores_train = data[data['Set'] == 'Train']
    vectores_test = data[data['Set'] == 'Test']
    #vectores_test = data[data['N_Pedido'] == '5-30033330']
    #vectores_train = data[data['N_Pedido'] == '5-30033330']
    #vectores_train.iloc[0:,4:-3] = 0
    #vectores_train.iloc[0:,3:9] = 1
    return (vectores_test, vectores_train)

In [3]:
def normalizar_vector(vector):
    norma = np.linalg.norm(vector)
    return vector/norma

In [4]:
def padding(vec, qubits):
    num_componentes_relleno = 2**qubits - len(vec)
    return np.pad(vec, (0, num_componentes_relleno), mode='constant', constant_values = 0)
#vec = np.array([1, 2, 3])
#padding(vec, 2)

In [5]:
def inner_prod(vec1, vec2):
    #first check lengths are equal
    if len(vec1) != len(vec2):
        raise ValueError('Lengths of states are not equal')

    numero_componentes = len(vec1)
    nqubits = math.ceil(np.log2(numero_componentes))

    vec1 = padding(vec1, nqubits)
    vec2 = padding(vec2, nqubits)

    circ = QuantumCircuit(nqubits+1,1)
    vec = np.concatenate([vec1,vec2])/np.sqrt(2)

    circ.initialize(vec, range(nqubits+1))
    circ.h(nqubits)
    circ.measure(nqubits,0)

    backend = Aer.get_backend('qasm_simulator')
    job = execute(circ, backend, shots=20000)

    result = job.result()
    outputstate = result.get_counts(circ)

    if ('0' in outputstate.keys()):
        m_sum = float(outputstate["0"])/20000
    else:
        m_sum = 0

    inner_product = 2*m_sum-1
    return inner_product

In [6]:
#apply(lambda x: x > 0).astype(int)
def itera_por_matriz_cuantica(vectores_test, vectores_train):
  for index_test, test in vectores_test.iterrows():
    for index_train, train in vectores_train.iterrows():
      test_normalizado = normalizar_vector(test.iloc[ :-3].to_numpy())
      train_normalizado = normalizar_vector(train.iloc[ :-3].to_numpy())
      #print(test_normalizado, train_normalizado)
      correlacion_entre_variables = inner_prod(test_normalizado, train_normalizado)
      yield [f'test_{index_test}', f'train_{index_train}', correlacion_entre_variables, test['Categoria'], test['N_Pedido']]

In [8]:
data = pd.read_csv('Montabilidad_to_Quantum.csv',delimiter=';', encoding='latin-1')

columnas_a_quitar = ['file_type','metadata_path', 'metadata_date', 'metadata_size', 'query_key', 'text', 'Expediente', 'Tipo_vehículo', 'Código', 'N_Produccion', 'Fecha_Tecnica', 'metadata_file']
data = data.drop(columnas_a_quitar, axis=1)

#data = data.iloc[:, 2:-2].apply(lambda x: x > 0).astype(int)

resultados_por_categoria = {}
#exoticos_por_categoria = {}

categorias = data.groupby('Categoria')

for nom_categoria, categoria in categorias:
    if nom_categoria != 'CLASE V':
         vectores_test, vectores_train = preparacion_vectores_train_test(categoria) 

         columnas = ['Nombre_Test', 'Nombre_Train', 'Correlacion', 'Categoria', 'N_Pedido']
         resultados_correlacion = pd.DataFrame(itera_por_matriz_cuantica(vectores_test, vectores_train), columns=columnas)
         resultados_correlacion_ordenado = resultados_correlacion.sort_values(by='Correlacion')

         nombre_resultado = f'resultados_{nom_categoria}'
         resultados_por_categoria[nombre_resultado] = resultados_correlacion
         
         display(resultados_correlacion_ordenado)

Unnamed: 0,Nombre_Test,Nombre_Train,Correlacion,Categoria,N_Pedido
3333,test_683,train_53,0.4897,AVANTGARDE,5329280040
3334,test_683,train_54,0.4983,AVANTGARDE,5329280040
3337,test_683,train_90,0.5046,AVANTGARDE,5329280040
3336,test_683,train_89,0.5314,AVANTGARDE,5329280040
3331,test_683,train_51,0.5365,AVANTGARDE,5329280040
...,...,...,...,...,...
1603,test_628,train_417,0.9652,AVANTGARDE,5-30033407
938,test_621,train_417,0.9655,AVANTGARDE,5-30033400
1128,test_623,train_417,0.9655,AVANTGARDE,5-30033402
1983,test_632,train_417,0.9664,AVANTGARDE,5-30033411


Unnamed: 0,Nombre_Test,Nombre_Train,Correlacion,Categoria,N_Pedido
698,test_684,train_232,0.6551,AVANTGARDE LINE ELECTRIC,5482300012
702,test_684,train_277,0.6680,AVANTGARDE LINE ELECTRIC,5482300012
697,test_684,train_230,0.6911,AVANTGARDE LINE ELECTRIC,5482300012
714,test_684,train_406,0.6926,AVANTGARDE LINE ELECTRIC,5482300012
707,test_684,train_331,0.6929,AVANTGARDE LINE ELECTRIC,5482300012
...,...,...,...,...,...
401,test_609,train_412,0.8663,AVANTGARDE LINE ELECTRIC,5-30033358
633,test_617,train_412,0.8668,AVANTGARDE LINE ELECTRIC,5-30033366
575,test_615,train_412,0.8671,AVANTGARDE LINE ELECTRIC,5-30033364
227,test_603,train_412,0.8681,AVANTGARDE LINE ELECTRIC,5-30033352


Unnamed: 0,Nombre_Test,Nombre_Train,Correlacion,Categoria,N_Pedido
23,test_489,train_206,0.5820,BASE,5-30033330
80,test_491,train_133,0.5837,BASE,5-30033332
48,test_490,train_132,0.5848,BASE,5-30033331
0,test_489,train_1,0.5853,BASE,5-30033330
6,test_489,train_120,0.5859,BASE,5-30033330
...,...,...,...,...,...
154,test_524,train_367,0.9556,BASE,5-30033334
123,test_523,train_367,0.9577,BASE,5-30033333
29,test_489,train_352,0.9773,BASE,5-30033330
91,test_491,train_352,0.9777,BASE,5-30033332


Unnamed: 0,Nombre_Test,Nombre_Train,Correlacion,Categoria,N_Pedido
141,test_665,train_167,0.7401,EXCLUSIVE,5329233114
35,test_584,train_443,0.7415,EXCLUSIVE,5-30032059
173,test_679,train_439,0.7438,EXCLUSIVE,5-30032064
195,test_680,train_443,0.7484,EXCLUSIVE,5-30032065
119,test_588,train_460,0.7491,EXCLUSIVE,5-30032063
...,...,...,...,...,...
86,test_587,train_335,0.8867,EXCLUSIVE,5-30032062
85,test_587,train_333,0.8876,EXCLUSIVE,5-30032062
128,test_661,train_414,0.8894,EXCLUSIVE,5229235332
138,test_661,train_458,0.8947,EXCLUSIVE,5229235332


Unnamed: 0,Nombre_Test,Nombre_Train,Correlacion,Categoria,N_Pedido
22,test_641,train_444,0.7135,MARCO POLO,5-30033422
5,test_641,train_278,0.7144,MARCO POLO,5-30033422
2,test_641,train_237,0.7708,MARCO POLO,5-30033422
6,test_641,train_279,0.7772,MARCO POLO,5-30033422
1,test_641,train_236,0.7924,MARCO POLO,5-30033422
3,test_641,train_238,0.7956,MARCO POLO,5-30033422
4,test_641,train_263,0.8037,MARCO POLO,5-30033422
8,test_641,train_325,0.8048,MARCO POLO,5-30033422
10,test_641,train_401,0.8083,MARCO POLO,5-30033422
23,test_641,train_445,0.8175,MARCO POLO,5-30033422


Unnamed: 0,Nombre_Test,Nombre_Train,Correlacion,Categoria,N_Pedido
61,test_553,train_76,0.3903,MIXTO,5229235333
203,test_575,train_150,0.4108,MIXTO,5482300008
188,test_574,train_150,0.4136,MIXTO,5482300007
201,test_575,train_148,0.4229,MIXTO,5482300008
187,test_574,train_149,0.4240,MIXTO,5482300007
...,...,...,...,...,...
74,test_553,train_382,0.7627,MIXTO,5229235333
44,test_538,train_382,0.7627,MIXTO,5-30033346
13,test_463,train_369,0.7636,MIXTO,5229235324
11,test_463,train_228,0.7644,MIXTO,5229235324


Unnamed: 0,Nombre_Test,Nombre_Train,Correlacion,Categoria,N_Pedido
2824,test_526,train_101,0.1022,NADA,5482300014
2573,test_512,train_70,0.1053,NADA,5482300013
2774,test_526,train_18,0.1057,NADA,5482300014
2544,test_512,train_18,0.1057,NADA,5482300013
2818,test_526,train_94,0.1068,NADA,5482300014
...,...,...,...,...,...
2969,test_601,train_243,0.8687,NADA,5329280132
2970,test_601,train_244,0.8716,NADA,5329280132
2968,test_601,train_242,0.8742,NADA,5329280132
2967,test_601,train_241,0.8751,NADA,5329280132


Unnamed: 0,Nombre_Test,Nombre_Train,Correlacion,Categoria,N_Pedido
168,test_515,train_20,0.6011,PRO,5-30033312
173,test_515,train_212,0.6036,PRO,5-30033312
187,test_516,train_212,0.6041,PRO,5-30033313
159,test_514,train_212,0.6043,PRO,5-30033311
154,test_514,train_20,0.6055,PRO,5-30033311
...,...,...,...,...,...
53,test_472,train_361,0.9585,PRO,5-30033303
110,test_476,train_362,0.9587,PRO,5-30033307
67,test_473,train_361,0.9607,PRO,5-30033304
153,test_513,train_366,0.9822,PRO,5-30033310


Unnamed: 0,Nombre_Test,Nombre_Train,Correlacion,Categoria,N_Pedido
52,test_510,train_364,0.5916,SELECT,5329233109
50,test_510,train_360,0.6011,SELECT,5329233109
51,test_510,train_363,0.6024,SELECT,5329233109
53,test_510,train_365,0.6026,SELECT,5329233109
54,test_510,train_368,0.6563,SELECT,5329233109
...,...,...,...,...,...
72,test_521,train_364,0.9697,SELECT,5-30033328
70,test_521,train_360,0.9698,SELECT,5-30033328
77,test_522,train_364,0.9698,SELECT,5-30033329
71,test_521,train_363,0.9700,SELECT,5-30033328


Unnamed: 0,Nombre_Test,Nombre_Train,Correlacion,Categoria,N_Pedido
34,test_590,train_8,0.5478,STYLE,5-30032067
1090,test_681,train_8,0.5493,STYLE,5-30032072
1123,test_682,train_8,0.5532,STYLE,5-30032073
1089,test_681,train_7,0.5572,STYLE,5-30032072
166,test_594,train_8,0.5576,STYLE,5-30032071
...,...,...,...,...,...
1052,test_674,train_451,0.9777,STYLE,5329280054
1019,test_673,train_451,0.9778,STYLE,5329280053
1084,test_675,train_450,0.9779,STYLE,5329280055
1021,test_673,train_453,0.9787,STYLE,5329280053


Unnamed: 0,Nombre_Test,Nombre_Train,Correlacion,Categoria,N_Pedido
512,test_560,train_140,0.4287,TOURER PRO,5482300009
662,test_576,train_140,0.4345,TOURER PRO,5482300010
675,test_576,train_272,0.4450,TOURER PRO,5482300010
0,test_527,train_136,0.4482,TOURER PRO,5-30032074
528,test_560,train_340,0.4484,TOURER PRO,5482300009
...,...,...,...,...,...
382,test_547,train_374,0.9818,TOURER PRO,5-30033393
172,test_540,train_374,0.9823,TOURER PRO,5-30033386
441,test_549,train_373,0.9830,TOURER PRO,5-30033395
321,test_545,train_373,0.9830,TOURER PRO,5-30033391


Unnamed: 0,Nombre_Test,Nombre_Train,Correlacion,Categoria,N_Pedido
111,test_551,train_314,0.5474,TOURER SELECT,5229233110
19,test_465,train_273,0.5578,TOURER SELECT,5229235384
122,test_562,train_147,0.5743,TOURER SELECT,5-30032079
137,test_563,train_147,0.5747,TOURER SELECT,5-30032159
108,test_551,train_271,0.5757,TOURER SELECT,5229233110
...,...,...,...,...,...
84,test_535,train_378,0.9709,TOURER SELECT,5-30033341
98,test_536,train_377,0.9722,TOURER SELECT,5-30033342
101,test_536,train_380,0.9726,TOURER SELECT,5-30033342
164,test_566,train_388,0.9792,TOURER SELECT,5-30033343


Unnamed: 0,Nombre_Test,Nombre_Train,Correlacion,Categoria,N_Pedido
8,test_552,train_268,0.5815,TOURIER,5229235295
6,test_552,train_152,0.5858,TOURIER,5229235295
15,test_568,train_139,0.5912,TOURIER,5-30033348
16,test_568,train_144,0.5986,TOURIER,5-30033348
27,test_569,train_139,0.6001,TOURIER,5-30033349
28,test_569,train_144,0.6013,TOURIER,5-30033349
20,test_568,train_268,0.6052,TOURIER,5-30033348
32,test_569,train_268,0.6091,TOURIER,5-30033349
18,test_568,train_152,0.6105,TOURIER,5-30033348
3,test_552,train_139,0.6139,TOURIER,5229235295


In [None]:
vectores_test = data[data['Set'] == 'Test']
def similitud_test(exoticos, vectores_test):
    for index_exotico, exotico in exoticos:
        for index_test,test in vectores_test:
            test_normalizado = normalizar_vector(test.iloc[ :-3].to_numpy())
            exotico_normalizado = normalizar_vector(data[data['N_pedido'] == exotico['N_pedido']].iloc[ :-3].to_numpy())
            similitud_entre_variables = inner_prod(exotico_normalizado, test_normalizado)
            yield [f'exotico_{index_exotico}', f'test_{index_test}', similitud_entre_variables, test['Categoria'], test['N_Pedido']]
            

In [None]:
columnas = ['Nombre_Test', 'Nombre_Train', 'Correlacion', 'Categoria', 'N_Pedido']
for exotico_categoria in exoticos_por_categoria:   
        resultados_similitud = pd.DataFrame(similitud_test(exotico_categoria, vectores_test, columns=columnas))
        indice_max_correlacion = resultados_similitud['Correlacion'].idxmax()
        # Obtiene la fila completa correspondiente al índice máximo
        similar = resultados_similitud.loc[indice_max_correlacion]
        # Crea un DataFrame con las columnas 'Exotico' y 'Similar'
        data = {'Exotico': exotico['N_Pedido'], 'Similar': similar['N_pedido']}
        resultados = pd.DataFrame(data)
        display(resultados)