In [9]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.layers import Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import BatchNormalization

In [2]:
file_path = '../Data/data_pre_procesada.csv'
data = pd.read_csv(file_path)
data.head()

Unnamed: 0,Coleccion,Referencia,Descripcion,Color,No.Pedido,Tot_Piezas,Costo_Fabrica,Precio_Venta,Pais_Producto,Cta_Cliente,Nombre_Cliente,Estatus,Plataforma,familia,Pais_Producto_fil,Cta_Cliente_final,Temporada,Nueva_Coleccion
0,FW2016,RK2321,GORRA LACOSTE PARA CABALLERO,1,5597,6.0,15.65,25.0,china,2313076,"RIPOSTO,S.A.",EN SISTEMA,ASIA,RK,china,2313076,FW,2016-11-01
1,FW2016,RK2321,GORRA LACOSTE PARA CABALLERO,1,5630,2.0,15.65,25.0,china,2224074,ICOSAL S.A. DE C.V.,EN SISTEMA,ASIA,RK,china,2224074,FW,2016-11-01
2,FW2016,RK2321,GORRA LACOSTE PARA CABALLERO,1,5631,2.0,15.65,25.0,china,2224090,"ICOSAL,S.A. AEROPUERT",EN SISTEMA,ASIA,RK,china,2224090,FW,2016-11-01
3,FW2016,RK2321,GORRA LACOSTE PARA CABALLERO,1,5635,5.0,15.65,25.0,china,2209051,"INVERSIONES MONTIJO, S.A.",EN SISTEMA,ASIA,RK,china,2209051,FW,2016-11-01
4,FW2016,RK2321,GORRA LACOSTE PARA CABALLERO,31,5597,6.0,15.65,25.0,china,2313076,"RIPOSTO,S.A.",EN SISTEMA,ASIA,RK,china,2313076,FW,2016-11-01


In [5]:
# Agrupando por temporada, cliente y familia
seasonal_data = data.groupby(['Nueva_Coleccion', 'Cta_Cliente_final', 'familia']).agg({'Tot_Piezas':'sum', 'Precio_Venta':'mean'}).reset_index()

In [13]:
def series_to_supervised(df, n_steps):
    X, y = [], []
    # Iterar por cada combinación única de 'Cta_Cliente_final' y 'familia'
    for cliente, cliente_df in df.groupby(['Cta_Cliente_final', 'familia']):
        # Asegurarse de que los datos estén ordenados por 'Nueva_Coleccion'
        cliente_df = cliente_df.sort_values('Nueva_Coleccion')
        # Extraer solo las columnas de interés
        values = cliente_df[['Tot_Piezas', 'Precio_Venta']].values
        for i in range(len(values) - n_steps):
            # Crear secuencias de X e y
            X.append(values[i:(i + n_steps), :])
            y.append(values[i + n_steps, 0])  # Asumiendo que Tot_Piezas es la primera columna
    return np.array(X), np.array(y)

# Definir n_steps
n_steps = 8  # Ejemplo, ajusta según necesidad
# Preparar los datos
X, y = series_to_supervised(seasonal_data, n_steps)

print(f"X shape: {X.shape}")  # Debería ser (n_samples, n_steps, n_features)
print(f"y shape: {y.shape}")  # Debería ser (n_samples,)

X shape: (1002, 8, 2)
y shape: (1002,)


In [14]:
seasonal_data.head()

Unnamed: 0,Nueva_Coleccion,Cta_Cliente_final,familia,Tot_Piezas,Precio_Venta
0,2016-05-01,2000006,AH,30.0,75.0
1,2016-05-01,2000006,CF,22.0,62.0
2,2016-05-01,2000006,CH,24.0,67.0
3,2016-05-01,2000006,CJ,8.0,31.0
4,2016-05-01,2000006,EF,8.0,73.0


In [15]:
# Aplanar los datos de entrada para escalar
n_input = X.shape[1] * X.shape[2]  # n_steps * n_features
X_flattened = X.reshape((X.shape[0], n_input))

# Escalar los datos
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_flattened)

# Volver a dar forma a los datos escalados a la forma original
X_scaled = X_scaled.reshape((X.shape[0], X.shape[1], X.shape[2]))

# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [16]:
model = Sequential([
    LSTM(50, activation='relu', input_shape=(n_steps, 2)),  # 2 por Tot_Piezas y Precio_Venta
    Dense(1)
])
model.compile(optimizer='adam', loss='mean_squared_error')

In [17]:
model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2, verbose=1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100


Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.src.callbacks.History at 0x7f69ec2fa170>

In [18]:
# Hacer predicciones
y_pred = model.predict(X_test)

# Calcular métricas de evaluación
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"RMSE: {rmse}")
print(f"R^2: {r2}")

MAE: 145.12513728462048
MSE: 179584.35520735275
RMSE: 423.7739435210154
R^2: 0.5349373170189781
