In [1]:
#Importaciones esenciales
import numpy as np
import pandas as pd

In [2]:
#Cargamos lo datos históricos (datos crudos)
df = pd.read_csv('piezas.csv')
df_org = df.copy(True)
df

Unnamed: 0,CG_PROD,DES_PROD,FE_EMIT,FECHA_INICIO
0,C-16073-1,CUERPO 2400 3L4 150 x 150,2023-07-03,2023-09-13
1,C-16073-1,CUERPO 2400 3L4 150 x 150,2023-09-13,2023-09-13
2,D-13261-15,"VASTAGO 2804-B 1""x2""-1.1/2""x2""",2023-06-02,2023-09-13
3,D-13262-15,GUIA PORTAD.2800-B 1x2-1.1/2x2,2023-09-13,2023-09-13
4,D-13274-15,DISCO 2800 F,2023-08-30,2023-09-13
...,...,...,...,...
12023,D-16638-15,"TORN.REG.1805 1/2-1.1/2"" (CORTO)",2021-02-23,2021-01-03
12024,D-17725-15,"VASTAGO 2804 1""x2"" A 1.1/2 X 3 """,2021-02-05,2021-01-03
12025,D-17910-10,"CAPUCHON 1700 1.1/2""",2021-01-19,2021-01-03
12026,D-17507-1,"BONETE 2804 2""X3"" MOD.10022",2022-04-22,2020-08-05


In [4]:
#Recortamos para que la fecha de emisión no incluya la hora, ya que puede ser imprecisa

df['FE_EMIT'] = df['FE_EMIT'].str[:11]
df['FECHA_INICIO'] = df['FECHA_INICIO'].str[:11]


In [5]:
#Creamos una función que pasa de una fecha a un número desde el nacimiento de Cristo
def date_to_num(date:str):
    year = int(date[:4])
    month = int(date[5:7])
    day = int(date[8:])
    return year*365+month*30+day

In [6]:
#Aplicamos las funciones para pasar las fechas relevantes a un escalar
df['FECHA_INICIO'] = df['FECHA_INICIO'].apply(lambda x: date_to_num(x))
df['FE_EMIT'] = df['FE_EMIT'].apply(lambda x: date_to_num(x))

### LÓGICA 1 (descontinuada)

Si FE_EMIT1<FE_ENTREGA2<FE_ENTREGA1 el producto 2 existió en simúltaneo con el producto 1  

In [None]:
def simultaneo(fe_emit1:int, fe_entrega1:int, fe_emit2:int, fe_entrega2:int):
    return (fe_emit1<=fe_emit2 and fe_emit2<fe_entrega1) or (fe_emit1<fe_entrega2 and fe_entrega2<fe_entrega1) or (fe_emit1>=fe_emit2 and fe_entrega1<=fe_entrega2)

In [None]:
empty = np.full(len(df), np.nan)
df.insert(3, 'SIMULTANEO', empty)

In [None]:
df_cop = df.copy(True)

In [None]:
df = df.drop_duplicates('PEDIDO')
df.loc[:, 'SIMULTANEO'] = df.loc[:, 'SIMULTANEO'].apply(lambda x: np.full(len(df), np.nan))
#df

In [None]:
#Con pandas

for i in range(len(df)):
    row1 = df.iloc[i]
    for k in range(i, len(df)):
        row2 = df.iloc[k]
        simul = simultaneo(row1[5], row1[4], row2[5], row2[4])
        df.iloc[i][3][k] = simul
        df.iloc[k][3][i] = simul

In [None]:
#Con numpy MUCHO MÁS RÁPIDO!!
data = df.to_numpy()
for i in range(len(df)):
    row1 = data[i]
    for k in range(i, len(df)):
        row2 = data[k]
        simul = simultaneo(row1[5], row1[4], row2[5], row2[4])
        data[i][3][k] = simul
        data[k][3][i] = simul

In [None]:
df = pd.DataFrame(data=data, columns=df.columns)

In [None]:
np.sum([i for i in df.iloc[0][3]])

611.0

In [None]:
simultaneo(df.iloc[0][5], df.iloc[0][4], df.iloc[1][5], df.iloc[1][4])

True

In [None]:
simultaneo(df.iloc[0][5], df.iloc[0][4], df.iloc[3][5], df.iloc[3][4])

True

In [None]:
df[np.asarray([i for i in df.iloc[0][3]], dtype=bool)]

Unnamed: 0,CG_PROD,DES_PROD,CANT,SIMULTANEO,FE_ENTREGA,FE_EMIT,FE_CIERRE,PEDIDO,FECHA_PREVISTA_FABRICACION,FECHA_INICIO_REAL_FABRICACION,TOT_DOL
0,2636F2R1RAM1017,"2600 1.1/2""F2"" 300(RF)x150(RF)",1.0,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",738759,738513,,60603,,,1756.25
1,2636F2R1RAM1017,"2600 1.1/2""F2"" 300(RF)x150(RF)",1.0,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",738759,738513,,60604,,,1756.25
2,2638G1R1RAM1017,"2600 1.1/2""G3"" 150(RF)x150(RF)",1.0,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",738759,738513,,60643,,,2262.56
3,2638G1R1RAM1017,"2600 1.1/2""G3"" 150(RF)x150(RF)",1.0,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",738759,738513,,60644,,,2262.56
4,2638H2R1RAM1017,"2600 1.1/2""H3"" 300(RF)x150(RF)",1.0,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",738759,738513,,60629,,,2005.02
...,...,...,...,...,...,...,...,...,...,...,...
606,2838F4R2RAM1017,"2800 1.1/2""F3"" 900(RF)x300(RF)",1.0,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",738519,738461,2023-04-11 13:52:21.773,60278,2023-04-11 07:10:47.777,2023-04-11 13:52:01.487,3503.0
607,1617A1R1RSM1010,"1600 1/2"" x 1"" 150(RF) x 150(RF) Tob 1/2""Std. ...",1.0,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",738518,738477,,60369,,,895.4
608,1621B1R1RSM1010,"1600 3/4"" x 1"" 150(RF) x 150(RF)",1.0,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",738518,738477,,60368,,,966.9
609,2842J1R1RSM1010,"2800 2""J3"" 150(RF)x150(RF) sellada",1.0,"[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, ...",738518,738477,,60370,,,1996.5


In [None]:
df['DES_PROD'].str[:4].unique()
PRODUCTS = ['2600', '2800', '1800', '1700', '2100', '2400', '1600', '9300', '1100', '3000', '2805', '2405', '1804', '1900', '1500', '1400', '9200', '9100', '1200', '1805', '6800']
NAME_LEN = 7
[df['DES_PROD'].apply(lambda x: x[:4] if x[:4] in PRODUCTS else x).str[:NAME_LEN].unique(),
 len(df['DES_PROD'].apply(lambda x: x[:4] if x[:4] in PRODUCTS else x).str[:NAME_LEN].unique())]

[array(['2600', '2800', '1800', 'AC-350 ', '1700', '2100', '2400',
        'AC/R-50', '1600', 'AR-30 1', 'AR-30 2', 'AR-20 1', '9300', '1100',
        '3000', '2805', 'Junta c', '2405', '1804', '1900', '1500', '1400',
        'AR-20 2', 'VA-1000', '9200', '9100', '1200', 'AC/R-35', 'AR-50 1',
        '1805', 'AC-500 ', 'AR-10 1', 'VA-3000', 'AC-900 ', 'AR-10 3',
        '6800', 'AC/R-90', 'CALENTA', 'AR-30 4', 'AR-20 6', 'AR-10 4',
        'AR-70 1', 'AR-30 3', 'AR-70 3', 'VA-1400', 'AR-40 1', 'VA-1200',
        'PILOTO ', 'ACR-900', 'Filtro ', 'AC-200 ', 'AR-90 1'],
       dtype=object),
 52]

In [None]:
df.drop(index=(df[df['FE_EMIT']==df['FE_ENTREGA']].index), axis=1)

In [None]:
len(df)

501

In [None]:
df.to_csv('HISTORICO_SIMUL.csv')

In [None]:
df_org.drop_duplicates('PEDIDO')

Unnamed: 0,CG_PROD,DES_PROD,CANT,FE_ENTREGA,FE_EMIT,FE_CIERRE,PEDIDO,FECHA_PREVISTA_FABRICACION,FECHA_INICIO_REAL_FABRICACION,TOT_DOL
0,2636F2R1RAM1017,"2600 1.1/2""F2"" 300(RF)x150(RF)",1.0,2023-12-04,2023-03-28 11:27:43.360,,60603,,,1756.25
1,2636F2R1RAM1017,"2600 1.1/2""F2"" 300(RF)x150(RF)",1.0,2023-12-04,2023-03-28 11:27:43.987,,60604,,,1756.25
2,2638G1R1RAM1017,"2600 1.1/2""G3"" 150(RF)x150(RF)",1.0,2023-12-04,2023-03-28 11:28:07.730,,60643,,,2262.56
3,2638G1R1RAM1017,"2600 1.1/2""G3"" 150(RF)x150(RF)",1.0,2023-12-04,2023-03-28 11:28:08.310,,60644,,,2262.56
4,2638H2R1RAM1017,"2600 1.1/2""H3"" 300(RF)x150(RF)",1.0,2023-12-04,2023-03-28 11:27:59.260,,60629,,,2005.02
...,...,...,...,...,...,...,...,...,...,...
3505,2142H2R1RAM1013,"2100 2""H3"" 300(RF)x150(RF)",1.0,2019-10-17,2020-01-10 11:04:06.967,2020-01-24 00:00:00.000,55960,,,1503.36
3506,2142J1R1RAM1012,"2100 2""J3"" 150(RF)x150(RF)",1.0,2019-10-17,2020-01-13 08:15:30.187,2020-01-24 00:00:00.000,55955,,,1384.32
3507,2142J2R1RAM1012,"2100 2""J3"" 300(RF)x150(RF)",1.0,2019-10-17,2020-01-10 11:04:12.380,2020-01-28 13:54:49.903,55957,,,1398.72
3508,2842J1R1RFM1010,"2800 2""J3"" 150(RF)x150(RF) Con Palanca",1.0,2019-03-06,2020-01-28 09:26:21.813,,56407,,,0.00


In [None]:
df_org.iloc[0], df_org.iloc[500]

(CG_PROD                                         2636F2R1RAM1017
 DES_PROD                         2600 1.1/2"F2" 300(RF)x150(RF)
 CANT                                                        1.0
 FE_ENTREGA                                           2023-12-04
 FE_EMIT                                 2023-03-28 11:27:43.360
 FE_CIERRE                                                   NaN
 PEDIDO                                                    60603
 FECHA_PREVISTA_FABRICACION                                  NaN
 FECHA_INICIO_REAL_FABRICACION                               NaN
 TOT_DOL                                                 1756.25
 Name: 0, dtype: object,
 CG_PROD                                     2666Q1R1RBB3117
 DES_PROD                         2600 6"Q8" 150(RF)x150(RF)
 CANT                                                    1.0
 FE_ENTREGA                                       2023-05-28
 FE_EMIT                             2023-02-28 07:17:56.483
 FE_CIERRE          

In [None]:
df_org['DES_PROD'].str[:4].unique(), df_org['DES_PROD'].str[:7].unique()

(array(['2600', '2800', '1800', 'AC-3', '1700', '2100', '2400', 'AC/R',
        '1600', 'AR-3', 'AR-2', '9300', '1100', '3000', '2805', 'Junt',
        '2405', '1804', '1900', '1500', '1400', 'VA-1', '9200', '9100',
        '1200', 'AR-5', '1805', 'AC-5', 'AR-1', 'VA-3', 'AC-9', '6800',
        'CALE', 'AR-7', 'AR-4', 'PILO', 'ACR-', 'Filt', 'AC-2', 'AR-9'],
       dtype=object),
 array(['2600 1.', '2800 1.', '2600 3"', '1800 1"', '2800 6"', 'AC-350 ',
        '1800 2"', '2800 3"', '1700 3/', '1800 1/', '1800 3/', '2100 1"',
        '2100 1.', '1800 1.', '2800 1"', '1700 1/', '2400 3"', '2800 2"',
        'AC/R-50', '2600 1"', '1700 1"', '1800 AP', '2800 4"', '1600 3/',
        'AR-30 1', 'AR-30 2', 'AR-20 1', '9300 8"', '1100 3/', '9300 6"',
        '3000 VA', '2805 2"', '2400 1.', 'Junta c', '1800 2.', '2405 1.',
        '1600 1"', '2100 4"', '2600 4"', '1804 1"', '1804 1/', '1900 3/',
        '2600 2"', '1500 1/', '1400 1/', 'AR-20 2', 'VA-1000', '9200 3"',
        '9300 4"', '2600 

In [None]:
df.to_csv('HISTORICO_SIMUL_1.csv')

In [None]:
df = pd.read_csv('HISTORICO_SIMUL.csv')
df

Unnamed: 0.1,Unnamed: 0,CG_PROD,DES_PROD,CANT,SIMULTANEO,FE_ENTREGA,FE_EMIT,FE_CIERRE,PEDIDO,FECHA_PREVISTA_FABRICACION,FECHA_INICIO_REAL_FABRICACION,TOT_DOL
0,0,2636F2R1RAM1017,"2600 1.1/2""F2"" 300(RF)x150(RF)",1.0,[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. ...,738759,738513,,60603,,,1756.25
1,2,2638G1R1RAM1017,"2600 1.1/2""G3"" 150(RF)x150(RF)",1.0,[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. ...,738759,738513,,60643,,,2262.56
2,4,2638H2R1RAM1017,"2600 1.1/2""H3"" 300(RF)x150(RF)",1.0,[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. ...,738759,738513,,60629,,,2005.02
3,10,2838H1R1RSM1014,"2800 1.1/2""H3"" 150(RF)x150(RF) sellada",1.0,[0. 0. 0. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. ...,738742,738597,,61272,,,1480.80
4,14,2651K1R1RBM1016,"2600 3""K4"" 150(RF)x150(RF)",1.0,[0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. ...,738718,738534,,60765,,,3355.80
...,...,...,...,...,...,...,...,...,...,...,...,...
724,3477,2128E3R1RAM1013,"2100 1""E2"" 600(RF)x150(RF)",1.0,[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ...,737352,737352,2020-01-24 00:00:00.000,55962,,,1158.72
725,3482,1821X9M9HKM1310,"1800 3/4""x1"" NPT(M)xNPT(H) Palanca Sellada",1.0,[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ...,737345,737345,2020-01-28 00:00:00.000,56269,,,440.00
726,3499,2836F3R1RAM3113,"2800 1.1/2""F2"" 600(RF)x150(RF)",1.0,[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ...,737318,737318,2020-03-04 00:00:00.000,56238,,,2390.00
727,3500,3515C3Z1,"AC-500 1/2"" BSP H-H Punta eje libre g.indist.",1.0,[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ...,737318,737318,2020-01-20 00:00:00.000,56244,,,683.00


In [None]:
del df['Unnamed: 0']

In [None]:
df

Unnamed: 0.1,Unnamed: 0,CG_PROD,DES_PROD,CANT,SIMULTANEO,FE_ENTREGA,FE_EMIT,FE_CIERRE,PEDIDO,FECHA_PREVISTA_FABRICACION,FECHA_INICIO_REAL_FABRICACION,TOT_DOL
0,0,2636F2R1RAM1017,"2600 1.1/2""F2"" 300(RF)x150(RF)",1.0,[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. ...,738759,738513,,60603,,,1756.25
1,2,2638G1R1RAM1017,"2600 1.1/2""G3"" 150(RF)x150(RF)",1.0,[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. ...,738759,738513,,60643,,,2262.56
2,4,2638H2R1RAM1017,"2600 1.1/2""H3"" 300(RF)x150(RF)",1.0,[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. ...,738759,738513,,60629,,,2005.02
3,10,2838H1R1RSM1014,"2800 1.1/2""H3"" 150(RF)x150(RF) sellada",1.0,[0. 0. 0. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. ...,738742,738597,,61272,,,1480.80
4,14,2651K1R1RBM1016,"2600 3""K4"" 150(RF)x150(RF)",1.0,[0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. ...,738718,738534,,60765,,,3355.80
...,...,...,...,...,...,...,...,...,...,...,...,...
724,3477,2128E3R1RAM1013,"2100 1""E2"" 600(RF)x150(RF)",1.0,[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ...,737352,737352,2020-01-24 00:00:00.000,55962,,,1158.72
725,3482,1821X9M9HKM1310,"1800 3/4""x1"" NPT(M)xNPT(H) Palanca Sellada",1.0,[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ...,737345,737345,2020-01-28 00:00:00.000,56269,,,440.00
726,3499,2836F3R1RAM3113,"2800 1.1/2""F2"" 600(RF)x150(RF)",1.0,[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ...,737318,737318,2020-03-04 00:00:00.000,56238,,,2390.00
727,3500,3515C3Z1,"AC-500 1/2"" BSP H-H Punta eje libre g.indist.",1.0,[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ...,737318,737318,2020-01-20 00:00:00.000,56244,,,683.00


### Lógica 2 (vigente)

Si FE_EMIT2<FE_ENTREGA1 el producto 2 existió en simúltaneo con el producto 1, estando los productos ordenados por FE_ENTREGA

In [8]:
#Lo primero que hacemos es limpiar el dataset eliminando los pedidos repetidos
dt =df
dt = dt[['CG_PROD', 'FE_EMIT', 'FECHA_INICIO']].copy(True)

In [10]:
#Luego ordenamos el dataset según las fechas de entrega que ya fueron convertidas a días
dt.sort_values('FECHA_INICIO', inplace=True)
dt

Unnamed: 0,CG_PROD,FE_EMIT,FECHA_INICIO
12027,D-17956-14,737488,737538
12026,D-17507-1,738172,737545
12024,D-17725-15,737730,737698
12023,D-16638-15,737748,737698
12022,D-13922-15,737750,737698
...,...,...,...
9,D-17726-14,738643,738678
10,D-17726-26,738665,738678
11,D-17930-15,737571,738678
6,D-13851-1,738608,738678


In [11]:
#Creamos un arreglo X donde se almacenarán los ejemplos de entrenamiento (la secuencia temporal de los productos hechos)
X = []
data = dt.to_numpy()
dt = dt.drop(dt[data[:, 1]>data[:,2]].index) #Eliminamos datos anómalos en los que la fecha de entrega se dió antes que la fecha de emisión
data = dt.to_numpy() #Convertimos los datos a numpy para mayor velocidad y simpleza
data

array([['D-17956-14     ', 737488, 737538],
       ['D-17956-15     ', 737690, 737700],
       ['D-13252-14', 737706, 737706],
       ...,
       ['D-17930-15', 737571, 738678],
       ['D-13851-1      ', 738608, 738678],
       ['C-16073-1', 738608, 738678]], dtype=object)

In [12]:
dt

Unnamed: 0,CG_PROD,FE_EMIT,FECHA_INICIO
12027,D-17956-14,737488,737538
12014,D-17956-15,737690,737700
12011,D-13252-14,737706,737706
12010,C-15245-1,737706,737706
12008,D-13526-1,737476,737709
...,...,...,...
9,D-17726-14,738643,738678
10,D-17726-26,738665,738678
11,D-17930-15,737571,738678
6,D-13851-1,738608,738678


In [13]:
"""
Esta verificación lo único que hace es chequear que el último
elemento fue el último en ser entregado,
por ello esta lleno de True ya que todos los productos que lo
preceden tuvieron fechas de inicio previas a la fecha de cierre del último
"""
data[0:len(dt), 1]<=data[len(dt)-1, 2]

array([ True,  True,  True, ...,  True,  True,  True])

In [14]:
"""
Para todo el dataset lo que se hace es buscar los productos que hayan sido producidos previo la fecha de cierre del elemento i
A su vez, al avanzar de producto se verifica que la fecha de entrega de ese producto sea única, si no lo es lo que se hace es
buscar el primer producto que tenga esa fecha de entrega e incluir de ahí en adelante (ya que de otra forma productos entregados
el mismo día tendrían diferentes ventanas cuando en realidad esto no sucedio de este modo). De ser única la fecha de entrega lo que
se hace es simplemente indexar de ahí en adelante ya que los productos previos ya fueron entregados, por lo que el modelo no debería
de importarle.
"""
for i in range(len(dt)):
    index = np.argmax(data[:, 2]==data[i, 2])
    X.append(np.concatenate(([False for j in range(index)], data[index:, 1]<=data[i, 2])))

In [16]:
#Lo que se hace es simplemente mapear las máscaras de simultaneo a los pedidos
pedidos = [dt[np.asarray(i, dtype=bool)]['CG_PROD'].to_numpy() for i in X]

In [17]:
#Se inserta una columna en el df que dice los productos que coexistieron
dt.insert(3, 'SIMUL', value = pedidos)

In [18]:
dt

Unnamed: 0,CG_PROD,FE_EMIT,FECHA_INICIO,SIMUL
12027,D-17956-14,737488,737538,"[D-17956-14 , D-13526-1 , C-12859C-53..."
12014,D-17956-15,737690,737700,"[D-17956-15 , D-13526-1 , D-13262-15,..."
12011,D-13252-14,737706,737706,"[D-13252-14, C-15245-1, D-13526-1 , D-152..."
12010,C-15245-1,737706,737706,"[D-13252-14, C-15245-1, D-13526-1 , D-152..."
12008,D-13526-1,737476,737709,"[D-13526-1 , D-15248-15, D-11374-26, D-13..."
...,...,...,...,...
9,D-17726-14,738643,738678,"[C-16073-1 , D-13261-15 , D-13262-15 ..."
10,D-17726-26,738665,738678,"[C-16073-1 , D-13261-15 , D-13262-15 ..."
11,D-17930-15,737571,738678,"[C-16073-1 , D-13261-15 , D-13262-15 ..."
6,D-13851-1,738608,738678,"[C-16073-1 , D-13261-15 , D-13262-15 ..."


In [19]:
#Esta función resume todo lo hecho hasta el momento
def logica2():
    global dt
    dt = df.drop_duplicates('CG_ORDF').copy(True)
    dt = dt[['CG_ORDF', 'FE_EMIT', 'FECHA']].copy(True)
    dt.sort_values('FECHA', inplace=True)
    X = []
    data = dt.to_numpy()
    dt = dt.drop(dt[data[:, 1]>data[:,2]].index)
    data = dt.to_numpy()
    for i in range(len(dt)):
        index = np.argmax(data[:, 2]==data[i, 2])
        X.append(np.concatenate(([False for j in range(index)], data[index:, 1]<=data[i, 2])))
    pedidos = [dt[np.asarray(i, dtype=bool)]['CG_ORDF'].to_numpy() for i in X]
    dt.insert(3, 'SIMUL', value = pedidos)
    print("Hecho!")

In [None]:
logica2()

Hecho!


### Postprocesamiento

In [20]:
feat_map = {
    "other":0,
    "CAPUCHON": 1,
    "CUERPO": 2,
    "CPO": 2,
    "BONETE": 3,
    "ANILLO": 4,
    "DISCO": 5,
    "CPO.TOB": 6,
    "CPO TOB": 6,
    "CONTRATUERCA": 7,
    "TOBERA": 8,
    "BUJE": 9,
    "GUIA": 10,
    "GUÍA": 10,  # Add both forms of GUIA
    "RETEN": 11,
    "VASTAGO": 12,
    "VAST": 12,
    "TORN": 13,
    "PORTADISCO": 14,
    "TUERCA": 15,
    "CABEZA": 16,
    "APOYO": 17,
    "CAJA": 18,
    "OBTURADOR": 19,
}

In [21]:
#Inserto una columna destinada a preservar la descripción del producto que luego será de utilidad
dt.insert(4 ,'DES', np.zeros(len(dt)))

In [22]:
#Llenamos la columna de descripción con los datos que corresponden
dt['DES'] = dt['CG_PROD'].apply(lambda x: [df[df['CG_PROD']==x].DES_PROD.iloc[0]])
#dt

In [23]:
#Creamos variables auxiliares a usar después
data = dt.to_numpy()
keys = feat_map.keys()

In [24]:
#En esta parte lo que se hace es convertir todos los datos en tokens para que mi modelo luego los logre procesar sin problema

for i in range(len(data)):
    for j in range(len(data[i, 3])):
        product_index = np.argmax(data[i, 3][j] == data[:, 0]) #Buscamos el índice del producto dentro de la lista de SIMUL según el pedido
        product_name = data[product_index, 4][0] #Obtenemos la descripción/nombre del producto
        #Encodeamos la secuencia
        for k in keys:
            if k in product_name: #Si la key está dentro de la descripción mapeamos a su correspondiente encodeo
                data[i, 3][j] = feat_map[k]
                break
            else: #Si la key no está se encodea como other
                data[i, 3][j] = feat_map['other']



for i in range(len(data)):
    #Encodeamos la etiqueta Y (producto en cuestión)
    for k in keys:
        if k in data[i, 4][0]:
            data[i, 4][0] = feat_map[k]
            break
        elif k==list(keys)[-1]:
            data[i, 4][0] = feat_map['other']


In [25]:
#Reemplazamos los datos el df por los tokenizados
dt['SIMUL'] = data[:, 3]
dt['DES'] = data[:, 4]
dt

Unnamed: 0,CG_PROD,FE_EMIT,FECHA_INICIO,SIMUL,DES
12027,D-17956-14,737488,737538,"[12, 1, 0, 12, 12, 0, 12, 0, 12, 0, 3, 5, 0, 1...",[12]
12014,D-17956-15,737690,737700,"[12, 1, 10, 0, 0, 12, 8, 5, 0, 12, 0, 0, 12, 0...",[12]
12011,D-13252-14,737706,737706,"[4, 2, 1, 19, 5, 10, 0, 0, 12, 8, 5, 0, 12, 0,...",[4]
12010,C-15245-1,737706,737706,"[4, 2, 1, 19, 5, 10, 0, 0, 12, 8, 5, 0, 12, 0,...",[2]
12008,D-13526-1,737476,737709,"[1, 19, 5, 10, 0, 0, 12, 8, 5, 0, 12, 0, 0, 12...",[1]
...,...,...,...,...,...
9,D-17726-14,738643,738678,"[2, 12, 10, 5, 5, 12, 2, 5, 5, 10, 2, 2]",[5]
10,D-17726-26,738665,738678,"[2, 12, 10, 5, 5, 12, 2, 5, 5, 10, 2, 2]",[5]
11,D-17930-15,737571,738678,"[2, 12, 10, 5, 5, 12, 2, 5, 5, 10, 2, 2]",[10]
6,D-13851-1,738608,738678,"[2, 12, 10, 5, 5, 12, 2, 5, 5, 10, 2, 2]",[2]


In [26]:
dt.rename(columns = {'SIMUL':'X'}, inplace = True)
dt.rename(columns = {'DES':'y'}, inplace = True)

In [27]:
dt.to_hdf('data_piezas2.h5', key='df', index=False)

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block1_values] [items->Index(['CG_PROD', 'X', 'y'], dtype='object')]

  dt.to_hdf('data_piezas2.h5', key='df', index=False)


In [30]:
dt.to_csv('data_piezas_final.csv')

In [28]:
pd.read_hdf('data_piezas2.h5', 'df')

Unnamed: 0,CG_PROD,FE_EMIT,FECHA_INICIO,X,y
12027,D-17956-14,737488,737538,"[12, 1, 0, 12, 12, 0, 12, 0, 12, 0, 3, 5, 0, 1...",[12]
12014,D-17956-15,737690,737700,"[12, 1, 10, 0, 0, 12, 8, 5, 0, 12, 0, 0, 12, 0...",[12]
12011,D-13252-14,737706,737706,"[4, 2, 1, 19, 5, 10, 0, 0, 12, 8, 5, 0, 12, 0,...",[4]
12010,C-15245-1,737706,737706,"[4, 2, 1, 19, 5, 10, 0, 0, 12, 8, 5, 0, 12, 0,...",[2]
12008,D-13526-1,737476,737709,"[1, 19, 5, 10, 0, 0, 12, 8, 5, 0, 12, 0, 0, 12...",[1]
...,...,...,...,...,...
9,D-17726-14,738643,738678,"[2, 12, 10, 5, 5, 12, 2, 5, 5, 10, 2, 2]",[5]
10,D-17726-26,738665,738678,"[2, 12, 10, 5, 5, 12, 2, 5, 5, 10, 2, 2]",[5]
11,D-17930-15,737571,738678,"[2, 12, 10, 5, 5, 12, 2, 5, 5, 10, 2, 2]",[10]
6,D-13851-1,738608,738678,"[2, 12, 10, 5, 5, 12, 2, 5, 5, 10, 2, 2]",[2]
