In [1]:
!pip install numpy==1.23.5
!pip install --upgrade mxnet==1.6.0
!pip install gluonts
!pip install lightning

Collecting lightning
  Using cached lightning-2.5.0.post0-py3-none-any.whl.metadata (40 kB)
Collecting lightning-utilities<2.0,>=0.10.0 (from lightning)
  Using cached lightning_utilities-0.14.0-py3-none-any.whl.metadata (5.6 kB)
Collecting torchmetrics<3.0,>=0.7.0 (from lightning)
  Using cached torchmetrics-1.6.2-py3-none-any.whl.metadata (20 kB)
Collecting pytorch-lightning (from lightning)
  Using cached pytorch_lightning-2.5.0.post0-py3-none-any.whl.metadata (21 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<4.0,>=2.1.0->lightning)
  Using cached nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<4.0,>=2.1.0->lightning)
  Using cached nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<4.0,>=2.1.0->lightning)
  Using cached nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.m

In [2]:
import numpy as np
import mxnet as mx
np.random.seed(7)
mx.random.seed(7)

In [3]:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from gluonts.evaluation.backtest import make_evaluation_predictions
from tqdm.autonotebook import tqdm
from gluonts.torch import DeepAREstimator
from gluonts.mx.trainer import Trainer
import numpy as np
from gluonts.dataset.common import ListDataset
from gluonts.dataset.field_names import FieldName

mpl.rcParams['figure.figsize'] = (10, 8)
mpl.rcParams['axes.grid'] = False
pd.set_option('display.max_columns', None)

  from tqdm.autonotebook import tqdm


In [4]:
cluster_number = 3
features = pd.read_parquet("/content/features.parquet")
features = features.sort_values(["pdv_codigo", "codigo_barras_sku", "fecha_comercial"]).reset_index(drop=True)
features = features[features["cluster"] == cluster_number]
sku = 7894900027013


filtered = features[
    (features["codigo_barras_sku"] == sku)
].copy()

filtered = filtered[filtered['pdv_codigo']==1]

# filtered = filtered[(filtered['pdv_codigo']== 1 ) | (filtered['pdv_codigo']== 2 ) ]
filtered = filtered[filtered['fecha_comercial'] <= '2024-11-30']
validation = filtered[filtered['fecha_comercial']>= '2024-11-01']
filtered = filtered[filtered['fecha_comercial'] < '2024-11-01']
filtered['fecha_comercial'].max(), filtered['fecha_comercial'].min()

(Timestamp('2024-10-31 00:00:00'), Timestamp('2022-12-01 00:00:00'))

# Test

In [15]:
df = filtered.pivot(
    index="fecha_comercial",
    columns="pdv_codigo",
    values="cant_vta"
)

date_range = pd.date_range(start=filtered['fecha_comercial'].min(), end=filtered['fecha_comercial'].max(), freq='D')
df = df.reindex(date_range)

# Rename columns to include the pdv_codigo prefix
df.columns = [f"pdv_codigo_{col}" for col in df.columns]

# Reset the index to make fecha_comercial a column
df_input = df.reset_index().rename(columns={"index": "date"})

ts_code = np.arange(len(df_input.columns[1:]), dtype=int)

ts_code_mapping = dict(zip(df_input.columns[1:], ts_code))

df_values = df_input.iloc[:, 1:].astype(float)

df_train = df_values.iloc[:-31, :].values
df_test = df_values.iloc[:, :].values

freq = "D"
start = pd.Timestamp("2022-12-01")
start_train = pd.Timestamp("2022-12-01")
start_test = pd.Timestamp("2024-10-01")
prediction_length = 31

estimator = DeepAREstimator(
    freq="D",  # Frequency of the time series (e.g., "D" for daily)
    prediction_length=prediction_length,  # Prediction length
    num_layers=2,  # Number of RNN layers
    hidden_size=32,  # Number of hidden units in each RNN layer
    lr=0.001,  # Learning rate
    weight_decay=1e-08,  # Weight decay for regularization
    dropout_rate=0.1,  # Dropout rate for regularization
    patience=10,  # Patience for early stopping
    num_feat_dynamic_real=0,  # Number of dynamic real features
    num_feat_static_cat=1,  # Number of static categorical features
    num_feat_static_real=0,  # Number of static real features
    cardinality=[len(np.unique(ts_code))],  # Number of unique categories
    embedding_dimension=None,  # Embedding dimension for categorical features
    scaling=True,  # Whether to scale the data
    default_scale=None,  # Default scale for scaling
    lags_seq=None,  # Custom lag sequence (optional)
    time_features=None,  # Custom time features (optional)
    num_parallel_samples=100,  # Number of parallel samples for prediction
    batch_size=32,  # Batch size for training
    num_batches_per_epoch=50,  # Number of batches per epoch
    imputation_method=None,  # Method for imputing missing values
    trainer_kwargs={"max_epochs": 5},  # Trainer configuration
    train_sampler=None,  # Custom train sampler (optional)
    validation_sampler=None,  # Custom validation sampler (optional)
    nonnegative_pred_samples=False,  # Whether to enforce non-negative predictions
)

train_ds = ListDataset([
    {
        FieldName.TARGET: target,
        FieldName.START: start,
        FieldName.FEAT_STATIC_CAT: [fsc]
    }
    for target, fsc in zip(df_train.T, ts_code)
], freq=freq)

test_ds = ListDataset([
    {
        FieldName.TARGET: target,
        FieldName.START: start,
        FieldName.FEAT_STATIC_CAT: [fsc]
    }
    for target, fsc in zip(df_test.T, ts_code)
], freq=freq)

predictor = estimator.train(training_data=train_ds)

forecast_it, ts_it = make_evaluation_predictions(
    dataset=test_ds,
    predictor=predictor,
    num_samples=100,
)

print("Obtaining time series conditioning values ...")
tss = list(tqdm(ts_it, total=len(df_test)))
print("Obtaining time series predictions ...")
forecasts = list(tqdm(forecast_it, total=len(df_test)))

all_results = []

# Iterate over each pdv_codigo
for i, (tss_series, forecast) in enumerate(zip(tss, forecasts)):
    # Extract the latest 31 real values for the current pdv_codigo
    latest_tss = tss_series.iloc[-31:].values.flatten()

    # Extract the mean predictions for the current pdv_codigo
    predictions = forecast.mean

    pdv_codigo_name = df_input.columns[i + 1]

    # Create a DataFrame for the current pdv_codigo
    results = pd.DataFrame({
        'date': pd.date_range(start=start_test, periods=prediction_length, freq=freq),
        'cant_vta': latest_tss,
        'cant_vta_pred_deepar': predictions,
        'pdv_codigo': pdv_codigo_name
    })

    # Append the results to the list
    all_results.append(results)

# Combine all results into a single DataFrame
final_results = pd.concat(all_results, ignore_index=True)
final_results['codigo_barras_sku'] = sku
final_results.rename(columns={'date': 'fecha_comercial'}, inplace=True)
final_results['pdv_codigo'] = final_results['pdv_codigo'].str.extract(r'(\d+)$').astype(int)

final_results['fecha_comercial'] = pd.to_datetime(final_results['fecha_comercial'])
final_results['codigo_barras_sku'] = final_results['codigo_barras_sku'].astype(int)
final_results['pdv_codigo'] = final_results['pdv_codigo'].astype(int)
final_results


INFO: GPU available: False, used: False
INFO:lightning.pytorch.utilities.rank_zero:GPU available: False, used: False
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
/usr/local/lib/python3.11/dist-packages/lightning/pytorch/trainer/configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
INFO: 
  | Name  | Type        | Params | Mode  | In sizes                                                         | Out sizes   
--------------------------------------------------------------------------------------------------------------------------------
0 | model | DeepARModel | 17.6 K | train | [[1, 1], [1, 1], [1, 1123, 4], [1, 1123], [1, 1123], [1, 31, 4]] | [1, 100, 31]
-----------------------------------------------------------------------

Training: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 0, global step 50: 'train_loss' reached 13.33731 (best 13.33731), saving model to '/content/lightning_logs/version_4/checkpoints/epoch=0-step=50.ckpt' as top 1
INFO:lightning.pytorch.utilities.rank_zero:Epoch 0, global step 50: 'train_loss' reached 13.33731 (best 13.33731), saving model to '/content/lightning_logs/version_4/checkpoints/epoch=0-step=50.ckpt' as top 1
INFO: Epoch 1, global step 100: 'train_loss' reached 12.95419 (best 12.95419), saving model to '/content/lightning_logs/version_4/checkpoints/epoch=1-step=100.ckpt' as top 1
INFO:lightning.pytorch.utilities.rank_zero:Epoch 1, global step 100: 'train_loss' reached 12.95419 (best 12.95419), saving model to '/content/lightning_logs/version_4/checkpoints/epoch=1-step=100.ckpt' as top 1
INFO: Epoch 2, global step 150: 'train_loss' reached 12.54384 (best 12.54384), saving model to '/content/lightning_logs/version_4/checkpoints/epoch=2-step=150.ckpt' as top 1
INFO:lightning.pytorch.utilities.rank_zero:Epoch 2, global s

Obtaining time series conditioning values ...


  0%|          | 0/701 [00:00<?, ?it/s]

Obtaining time series predictions ...


  0%|          | 0/701 [00:00<?, ?it/s]

Unnamed: 0,fecha_comercial,cant_vta,cant_vta_pred_deepar,pdv_codigo,codigo_barras_sku
0,2024-10-01,244000.0,181470.8125,1,7894900027013
1,2024-10-02,226000.0,202354.796875,1,7894900027013
2,2024-10-03,262000.0,220903.515625,1,7894900027013
3,2024-10-04,434000.0,341269.625,1,7894900027013
4,2024-10-05,438000.0,511311.4375,1,7894900027013
5,2024-10-06,476000.0,408624.46875,1,7894900027013
6,2024-10-07,160000.0,129185.242188,1,7894900027013
7,2024-10-08,166000.0,153875.671875,1,7894900027013
8,2024-10-09,152000.0,188540.859375,1,7894900027013
9,2024-10-10,154000.0,238918.953125,1,7894900027013


In [16]:
final_results['cant_vta_pred_deepar'].mean()

284990.03

In [9]:
273285
303432
274640.8
284990.03

test_1 = final_results.copy()
test_1

Unnamed: 0,fecha_comercial,cant_vta,cant_vta_pred_deepar,pdv_codigo,codigo_barras_sku
0,2024-10-01,244000.0,174247.9375,1,7894900027013
1,2024-10-02,226000.0,191400.4375,1,7894900027013
2,2024-10-03,262000.0,195414.140625,1,7894900027013
3,2024-10-04,434000.0,343758.75,1,7894900027013
4,2024-10-05,438000.0,507102.59375,1,7894900027013
5,2024-10-06,476000.0,434407.125,1,7894900027013
6,2024-10-07,160000.0,144910.015625,1,7894900027013
7,2024-10-08,166000.0,161071.765625,1,7894900027013
8,2024-10-09,152000.0,198009.234375,1,7894900027013
9,2024-10-10,154000.0,247617.8125,1,7894900027013


a borrar

In [17]:
cluster_number = 3
features = pd.read_parquet("/content/features.parquet")
features = features.sort_values(["pdv_codigo", "codigo_barras_sku", "fecha_comercial"]).reset_index(drop=True)
features = features[features["cluster"] == cluster_number]
sku = 7894900027013


filtered = features[
    (features["codigo_barras_sku"] == sku)
].copy()

filtered = filtered[filtered['pdv_codigo']==1]

# filtered = filtered[(filtered['pdv_codigo']== 1 ) | (filtered['pdv_codigo']== 2 ) ]
filtered = filtered[filtered['fecha_comercial'] <= '2024-10-31']
validation = filtered[filtered['fecha_comercial']>= '2024-10-01']
filtered = filtered[filtered['fecha_comercial'] < '2024-10-01']
filtered['fecha_comercial'].max(), filtered['fecha_comercial'].min()

(Timestamp('2024-09-30 00:00:00'), Timestamp('2022-12-01 00:00:00'))

In [24]:
import random
import numpy as np
import mxnet as mx  # If using MXNet backend
import torch  # If using PyTorch backend
import sys
# Set random seeds for reproducibility
random_seed = 42

# Set seed for Python's random module
random.seed(random_seed)

# Set seed for NumPy
np.random.seed(random_seed)


# Set seed for MXNet (if using MXNet backend)
if 'mxnet' in sys.modules:
    mx.random.seed(random_seed)

# Set seed for PyTorch (if using PyTorch backend)
if 'torch' in sys.modules:
    torch.manual_seed(random_seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(random_seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

In [27]:
train_data = filtered.copy()

df_train = train_data.pivot(
    index="fecha_comercial",
    columns="pdv_codigo",
    values="cant_vta"
)

date_range = pd.date_range(start=df_train.index.min(), end='2024-10-31', freq='D')
df_train = df_train.reindex(date_range)

df_train.columns = [f"pdv_codigo_{col}" for col in df_train.columns]
df_input = df_train.reset_index().rename(columns={"index": "date"})

ts_code = np.arange(len(df_input.columns[1:]), dtype=int)
ts_code_mapping = dict(zip(df_input.columns[1:], ts_code))

df_values = df_input.iloc[:, 1:].astype(float)

df_train = df_values.iloc[:-31, :].values
df_test = df_values.iloc[:, :].values

freq = "D"
start = pd.Timestamp("2022-12-01")
start_train = pd.Timestamp("2022-12-01")
start_test = pd.Timestamp("2024-10-01")
prediction_length = 31

estimator = DeepAREstimator(
    freq="D",  # Frequency of the time series (e.g., "D" for daily)
    prediction_length=prediction_length,  # Prediction length
    num_layers=2,  # Number of RNN layers
    hidden_size=32,  # Number of hidden units in each RNN layer
    lr=0.001,  # Learning rate
    weight_decay=1e-08,  # Weight decay for regularization
    dropout_rate=0.1,  # Dropout rate for regularization
    patience=10,  # Patience for early stopping
    num_feat_dynamic_real=0,  # Number of dynamic real features
    num_feat_static_cat=1,  # Number of static categorical features
    num_feat_static_real=0,  # Number of static real features
    cardinality=[len(np.unique(ts_code))],  # Number of unique categories
    embedding_dimension=None,  # Embedding dimension for categorical features
    scaling=True,  # Whether to scale the data
    default_scale=None,  # Default scale for scaling
    lags_seq=None,  # Custom lag sequence (optional)
    time_features=None,  # Custom time features (optional)
    num_parallel_samples=100,  # Number of parallel samples for prediction
    batch_size=32,  # Batch size for training
    num_batches_per_epoch=50,  # Number of batches per epoch
    imputation_method=None,  # Method for imputing missing values
    trainer_kwargs={"max_epochs": 5},  # Trainer configuration
    train_sampler=None,  # Custom train sampler (optional)
    validation_sampler=None,  # Custom validation sampler (optional)
    nonnegative_pred_samples=False,  # Whether to enforce non-negative predictions
)

train_ds = ListDataset([
    {
        FieldName.TARGET: target,
        FieldName.START: start,
        FieldName.FEAT_STATIC_CAT: [fsc]
    }
    for target, fsc in zip(df_train.T, ts_code)
], freq=freq)

test_ds = ListDataset([
    {
        FieldName.TARGET: target,
        FieldName.START: start,
        FieldName.FEAT_STATIC_CAT: [fsc]
    }
    for target, fsc in zip(df_test.T, ts_code)
], freq=freq)

predictor = estimator.train(training_data=train_ds)
forecast_it, ts_it = make_evaluation_predictions(
    dataset=test_ds,
    predictor=predictor,
    num_samples=100,
)
print("Obtaining time series conditioning values ...")
tss = list(tqdm(ts_it, total=len(df_test)))
print("Obtaining time series predictions ...")
forecasts = list(tqdm(forecast_it, total=len(df_test)))


all_results = []

# Iterate over each pdv_codigo
for i, (tss_series, forecast) in enumerate(zip(tss, forecasts)):
    # Extract the latest 30 real values for the current pdv_codigo
    latest_tss = tss_series.iloc[-31:].values.flatten()

    # Extract the mean predictions for the current pdv_codigo
    predictions = forecast.mean

    pdv_codigo_name = df_input.columns[i + 1]

    # Create a DataFrame for the current pdv_codigo
    results = pd.DataFrame({
        'date': pd.date_range(start=start_test, periods=prediction_length, freq=freq),
        'cant_vta': latest_tss,
        'cant_vta_pred_deepar': predictions,
        'pdv_codigo': pdv_codigo_name
    })

    # Append the results to the list
    all_results.append(results)

# Combine all results into a single DataFrame
final_results = pd.concat(all_results, ignore_index=True)

final_results['codigo_barras_sku'] = sku
final_results.rename(columns={'date': 'fecha_comercial'}, inplace=True)
final_results['pdv_codigo'] = final_results['pdv_codigo'].str.extract(r'(\d+)$').astype(int)
final_results['fecha_comercial'] = pd.to_datetime(final_results['fecha_comercial'])
final_results['codigo_barras_sku'] = final_results['codigo_barras_sku'].astype(int)
final_results['pdv_codigo'] = final_results['pdv_codigo'].astype(int)
final_results.drop(columns=['cant_vta'], inplace=True)
final_results

INFO: GPU available: False, used: False
INFO:lightning.pytorch.utilities.rank_zero:GPU available: False, used: False
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
/usr/local/lib/python3.11/dist-packages/lightning/pytorch/trainer/configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
INFO: 
  | Name  | Type        | Params | Mode  | In sizes                                                         | Out sizes   
--------------------------------------------------------------------------------------------------------------------------------
0 | model | DeepARModel | 17.6 K | train | [[1, 1], [1, 1], [1, 1123, 4], [1, 1123], [1, 1123], [1, 31, 4]] | [1, 100, 31]
-----------------------------------------------------------------------

Training: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 0, global step 50: 'train_loss' reached 13.40338 (best 13.40338), saving model to '/content/lightning_logs/version_8/checkpoints/epoch=0-step=50.ckpt' as top 1
INFO:lightning.pytorch.utilities.rank_zero:Epoch 0, global step 50: 'train_loss' reached 13.40338 (best 13.40338), saving model to '/content/lightning_logs/version_8/checkpoints/epoch=0-step=50.ckpt' as top 1
INFO: Epoch 1, global step 100: 'train_loss' reached 13.09750 (best 13.09750), saving model to '/content/lightning_logs/version_8/checkpoints/epoch=1-step=100.ckpt' as top 1
INFO:lightning.pytorch.utilities.rank_zero:Epoch 1, global step 100: 'train_loss' reached 13.09750 (best 13.09750), saving model to '/content/lightning_logs/version_8/checkpoints/epoch=1-step=100.ckpt' as top 1
INFO: Epoch 2, global step 150: 'train_loss' reached 12.66932 (best 12.66932), saving model to '/content/lightning_logs/version_8/checkpoints/epoch=2-step=150.ckpt' as top 1
INFO:lightning.pytorch.utilities.rank_zero:Epoch 2, global s

Obtaining time series conditioning values ...


  0%|          | 0/701 [00:00<?, ?it/s]

Obtaining time series predictions ...


  0%|          | 0/701 [00:00<?, ?it/s]

Unnamed: 0,fecha_comercial,cant_vta_pred_deepar,pdv_codigo,codigo_barras_sku
0,2024-10-01,188531.765625,1,7894900027013
1,2024-10-02,199253.359375,1,7894900027013
2,2024-10-03,232631.71875,1,7894900027013
3,2024-10-04,388093.90625,1,7894900027013
4,2024-10-05,522246.1875,1,7894900027013
5,2024-10-06,450311.875,1,7894900027013
6,2024-10-07,152852.34375,1,7894900027013
7,2024-10-08,181756.625,1,7894900027013
8,2024-10-09,203568.4375,1,7894900027013
9,2024-10-10,229402.234375,1,7894900027013


In [28]:
final_results['cant_vta_pred_deepar'].mean()

291928.62

In [16]:
test_1

Unnamed: 0,fecha_comercial,cant_vta,cant_vta_pred_deepar,pdv_codigo,codigo_barras_sku
0,2024-10-01,244000.0,177558.140625,1,7894900027013
1,2024-10-02,226000.0,185531.640625,1,7894900027013
2,2024-10-03,262000.0,193008.953125,1,7894900027013
3,2024-10-04,434000.0,295129.90625,1,7894900027013
4,2024-10-05,438000.0,429299.6875,1,7894900027013
5,2024-10-06,476000.0,416794.15625,1,7894900027013
6,2024-10-07,160000.0,141657.625,1,7894900027013
7,2024-10-08,166000.0,154866.0625,1,7894900027013
8,2024-10-09,152000.0,175790.984375,1,7894900027013
9,2024-10-10,154000.0,194133.734375,1,7894900027013


# Validation

In [9]:

train_data = filtered.copy()

df_train = train_data.pivot(
    index="fecha_comercial",
    columns="pdv_codigo",
    values="cant_vta"
)

date_range = pd.date_range(start=df_train.index.min(), end='2024-11-30', freq='D')
df_train = df_train.reindex(date_range)

df_train.columns = [f"pdv_codigo_{col}" for col in df_train.columns]
df_input = df_train.reset_index().rename(columns={"index": "date"})

ts_code = np.arange(len(df_input.columns[1:]), dtype=int)
ts_code_mapping = dict(zip(df_input.columns[1:], ts_code))

df_values = df_input.iloc[:, 1:].astype(float)

df_train = df_values.iloc[:-30, :].values
df_test = df_values.iloc[:, :].values

freq = "D"
start = pd.Timestamp("2022-12-01")
start_train = pd.Timestamp("2022-12-01")
start_test = pd.Timestamp("2024-11-01")
prediction_length = 30

estimator = DeepAREstimator(
    freq="D",  # Frequency of the time series (e.g., "D" for daily)
    prediction_length=prediction_length,  # Prediction length
    num_layers=2,  # Number of RNN layers
    hidden_size=32,  # Number of hidden units in each RNN layer
    lr=0.001,  # Learning rate
    weight_decay=1e-08,  # Weight decay for regularization
    dropout_rate=0.1,  # Dropout rate for regularization
    patience=10,  # Patience for early stopping
    num_feat_dynamic_real=0,  # Number of dynamic real features
    num_feat_static_cat=1,  # Number of static categorical features
    num_feat_static_real=0,  # Number of static real features
    cardinality=[len(np.unique(ts_code))],  # Number of unique categories
    embedding_dimension=None,  # Embedding dimension for categorical features
    scaling=True,  # Whether to scale the data
    default_scale=None,  # Default scale for scaling
    lags_seq=None,  # Custom lag sequence (optional)
    time_features=None,  # Custom time features (optional)
    num_parallel_samples=100,  # Number of parallel samples for prediction
    batch_size=32,  # Batch size for training
    num_batches_per_epoch=50,  # Number of batches per epoch
    imputation_method=None,  # Method for imputing missing values
    trainer_kwargs={"max_epochs": 5},  # Trainer configuration
    train_sampler=None,  # Custom train sampler (optional)
    validation_sampler=None,  # Custom validation sampler (optional)
    nonnegative_pred_samples=False,  # Whether to enforce non-negative predictions
)

train_ds = ListDataset([
    {
        FieldName.TARGET: target,
        FieldName.START: start,
        FieldName.FEAT_STATIC_CAT: [fsc]
    }
    for target, fsc in zip(df_train.T, ts_code)
], freq=freq)

test_ds = ListDataset([
    {
        FieldName.TARGET: target,
        FieldName.START: start,
        FieldName.FEAT_STATIC_CAT: [fsc]
    }
    for target, fsc in zip(df_test.T, ts_code)
], freq=freq)

predictor = estimator.train(training_data=train_ds)
forecast_it, ts_it = make_evaluation_predictions(
    dataset=test_ds,
    predictor=predictor,
    num_samples=100,
)
print("Obtaining time series conditioning values ...")
tss = list(tqdm(ts_it, total=len(df_test)))
print("Obtaining time series predictions ...")
forecasts = list(tqdm(forecast_it, total=len(df_test)))


all_results = []

# Iterate over each pdv_codigo
for i, (tss_series, forecast) in enumerate(zip(tss, forecasts)):
    # Extract the latest 30 real values for the current pdv_codigo
    latest_tss = tss_series.iloc[-30:].values.flatten()

    # Extract the mean predictions for the current pdv_codigo
    predictions = forecast.mean

    pdv_codigo_name = df_input.columns[i + 1]

    # Create a DataFrame for the current pdv_codigo
    results = pd.DataFrame({
        'date': pd.date_range(start=start_test, periods=prediction_length, freq=freq),
        'cant_vta': latest_tss,
        'cant_vta_pred_deepar': predictions,
        'pdv_codigo': pdv_codigo_name
    })

    # Append the results to the list
    all_results.append(results)

# Combine all results into a single DataFrame
final_results = pd.concat(all_results, ignore_index=True)

final_results['codigo_barras_sku'] = sku
final_results.rename(columns={'date': 'fecha_comercial'}, inplace=True)
final_results['pdv_codigo'] = final_results['pdv_codigo'].str.extract(r'(\d+)$').astype(int)
final_results['fecha_comercial'] = pd.to_datetime(final_results['fecha_comercial'])
final_results['codigo_barras_sku'] = final_results['codigo_barras_sku'].astype(int)
final_results['pdv_codigo'] = final_results['pdv_codigo'].astype(int)
final_results.drop(columns=['cant_vta'], inplace=True)

validation = validation[['pdv_codigo', 'fecha_comercial', 'codigo_barras_sku','cant_vta']]
validation = validation.merge(final_results, on=['pdv_codigo', 'fecha_comercial', 'codigo_barras_sku'], how='left')

validation


INFO: GPU available: False, used: False
INFO:lightning.pytorch.utilities.rank_zero:GPU available: False, used: False
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
/usr/local/lib/python3.11/dist-packages/lightning/pytorch/trainer/configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.
INFO: 
  | Name  | Type        | Params | Mode  | In sizes                                                         | Out sizes   
--------------------------------------------------------------------------------------------------------------------------------
0 | model | DeepARModel | 19.5 K | train | [[1, 1], [1, 1], [1, 1122, 4], [1, 1122], [1, 1122], [1, 30, 4]] | [1, 100, 30]
-----------------------------------------------------------------------

Training: |          | 0/? [00:00<?, ?it/s]

INFO: Epoch 0, global step 50: 'train_loss' reached 13.25900 (best 13.25900), saving model to '/content/lightning_logs/version_1/checkpoints/epoch=0-step=50.ckpt' as top 1
INFO:lightning.pytorch.utilities.rank_zero:Epoch 0, global step 50: 'train_loss' reached 13.25900 (best 13.25900), saving model to '/content/lightning_logs/version_1/checkpoints/epoch=0-step=50.ckpt' as top 1
INFO: Epoch 1, global step 100: 'train_loss' reached 12.97715 (best 12.97715), saving model to '/content/lightning_logs/version_1/checkpoints/epoch=1-step=100.ckpt' as top 1
INFO:lightning.pytorch.utilities.rank_zero:Epoch 1, global step 100: 'train_loss' reached 12.97715 (best 12.97715), saving model to '/content/lightning_logs/version_1/checkpoints/epoch=1-step=100.ckpt' as top 1
INFO: Epoch 2, global step 150: 'train_loss' reached 12.60518 (best 12.60518), saving model to '/content/lightning_logs/version_1/checkpoints/epoch=2-step=150.ckpt' as top 1
INFO:lightning.pytorch.utilities.rank_zero:Epoch 2, global s

Obtaining time series conditioning values ...


  0%|          | 0/731 [00:00<?, ?it/s]

Obtaining time series predictions ...


  0%|          | 0/731 [00:00<?, ?it/s]

Unnamed: 0,pdv_codigo,fecha_comercial,codigo_barras_sku,cant_vta,cant_vta_pred_deepar
0,1,2024-11-01,7894900027013,504000.0,361293.750000
1,1,2024-11-02,7894900027013,572000.0,481802.125000
2,1,2024-11-03,7894900027013,410000.0,404281.531250
3,1,2024-11-04,7894900027013,326000.0,150433.546875
4,1,2024-11-05,7894900027013,184000.0,170887.421875
...,...,...,...,...,...
745,30,2024-11-26,7894900027013,56000.0,157898.484375
746,30,2024-11-27,7894900027013,114000.0,184487.703125
747,30,2024-11-28,7894900027013,92000.0,257885.796875
748,30,2024-11-29,7894900027013,210000.0,359573.593750
