<a href="https://colab.research.google.com/github/AndresMontesDeOca/Laboratorio3/blob/main/TimeSeriesSplit.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Temperatura



In [193]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping
import matplotlib.pyplot as plt
#############################################################################
def normalize_data(df, normalization="MinMax"):
    """
    Normaliza cada serie de tiempo (columna) de manera individual usando MinMax o Zscore.

    Args:
        df (pd.DataFrame): DataFrame con series de tiempo de distintos productos, cada columna es un producto.
        normalization (str): Tipo de normalización a aplicar. Opciones: "MinMax" o "Zscore". Default es "MinMax".

    Returns:
        normalized_df (pd.DataFrame): DataFrame con las series normalizadas.
        normalization_params (pd.DataFrame): DataFrame con los parámetros necesarios para desnormalizar cada columna.
            - Para "MinMax": valores min y max de cada columna.
            - Para "Zscore": valores mean y std de cada columna.
    """
    normalization_params = pd.DataFrame(columns=["product_id", "min", "max", "mean", "std"])
    normalized_df = pd.DataFrame(index=df.index)

    for column in df.columns:
        if normalization == "MinMax":
            scaler = MinMaxScaler()
            normalized_values = scaler.fit_transform(df[[column]]).flatten()
            new_params = pd.DataFrame({
                "product_id": [column],
                "min": [scaler.data_min_[0]],
                "max": [scaler.data_max_[0]],
                "mean": [None],
                "std": [None]
            })
            normalization_params = pd.concat([normalization_params, new_params], ignore_index=True)
            normalized_df[column] = normalized_values

        elif normalization == "ZScore":
            scaler = StandardScaler()
            normalized_values = scaler.fit_transform(df[[column]]).flatten()
            new_params = pd.DataFrame({
                "product_id": [column],
                "min": [None],
                "max": [None],
                "mean": [scaler.mean_[0]],
                "std": [scaler.scale_[0]]
            })
            normalization_params = pd.concat([normalization_params, new_params], ignore_index=True)
            normalized_df[column] = normalized_values

        else:
            raise ValueError("Invalid normalization method. Choose 'MinMax' or 'ZScore'.")

    return normalized_df, normalization_params
#############################################################################
def denormalize_series(normalized_series, normalization_params, normalization="MinMax"):
    """
    Desnormaliza una serie de tiempo usando los valores almacenados.

    Args:
        normalized_series (pd.Series or pd.DataFrame): Serie o DataFrame con los datos normalizados.
        normalization_params (pd.DataFrame): DataFrame con los parámetros necesarios para desnormalizar cada serie o columna.
            - Para "MinMax": valores min y max de cada serie o columna.
            - Para "Zscore": valores mean y std de cada serie o columna.
        normalization (str): Tipo de normalización a deshacer. Opciones: "MinMax" o "Zscore". Default es "MinMax".

    Returns:
        denormalized_series (pd.Series or pd.DataFrame): Serie o DataFrame con los datos desnormalizados.
    """
    if isinstance(normalized_series, pd.DataFrame):
        denormalized_df = pd.DataFrame(index=normalized_series.index)
        for column in normalized_series.columns:
            params = normalization_params[normalization_params["product_id"] == column]
            if normalization == "MinMax":
                min_value = params["min"].values[0]
                max_value = params["max"].values[0]
                denormalized_values = normalized_series[column] * (max_value - min_value) + min_value
            elif normalization == "ZScore":
                mean_value = params["mean"].values[0]
                std_value = params["std"].values[0]
                denormalized_values = normalized_series[column] * std_value + mean_value
            else:
                raise ValueError("Invalid normalization method. Choose 'MinMax' or 'ZScore'.")
            denormalized_df[column] = denormalized_values
        return denormalized_df
    elif isinstance(normalized_series, pd.Series):
        product_ids = normalized_series.index
        denormalized_values = []
        for product_id in product_ids:
            params = normalization_params[normalization_params["product_id"] == product_id]
            if normalization == "MinMax":
                min_value = params["min"].values[0]
                max_value = params["max"].values[0]
                denormalized_value = normalized_series[product_id] * (max_value - min_value) + min_value
            elif normalization == "ZScore":
                mean_value = params["mean"].values[0]
                std_value = params["std"].values[0]
                denormalized_value = normalized_series[product_id] * std_value + mean_value
            else:
                raise ValueError("Invalid normalization method. Choose 'MinMax' or 'ZScore'.")
            denormalized_values.append(denormalized_value)
        denormalized_series = pd.Series(denormalized_values, index=product_ids, name=normalized_series.name)
        return denormalized_series
    else:
        raise TypeError("normalized_series should be either a pandas Series or DataFrame")
#############################################################################
def split_data(df):
  df_train = df.loc['2017':'2019-05']
  df_valid = df.loc['2019-06':]
  return df_train, df_valid
#############################################################################
def windowed_dataset(sequence, data_split, window_size, horizon, batch_size, shuffle_buffer=1000):
    """Generates dataset windows.

    Args:
      sequence (array-like): Contains the values of the time series.
      data_split (str): Specifies if the dataset is for training or validation/test.
      window_size (int): The number of time steps to include in the feature.
      horizon (int): The number of future time steps to predict.
      batch_size (int): The batch size.
      shuffle_buffer (int): Buffer size to use for the shuffle method.

    Returns:
      tf.data.Dataset: TF Dataset containing time windows.
    """

    # Generate a TF Dataset from the series values
    dataset = tf.data.Dataset.from_tensor_slices(sequence)

    # Window the data but only take those with the specified size
    dataset = dataset.window(window_size + horizon, shift=1, drop_remainder=True)

    # Flatten the windows by putting its elements in a single batch
    dataset = dataset.flat_map(lambda window: window.batch(window_size + horizon))

    # Create tuples with features and labels
    dataset = dataset.map(lambda window: (window[:-horizon], window[-horizon:]))

    if data_split == 'train':
        # Shuffle the training data to improve generalization
        dataset = dataset.shuffle(shuffle_buffer)
    else:
        # Cache the validation/test data for improved performance
        dataset = dataset.cache()

    # Create batches of windows and prefetch for performance
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)

    return dataset
#############################################################################
def compile_model(new_model, loss, optimizer):
    new_model.compile(optimizer=optimizer, loss=loss, metrics=['mae'])
    print(new_model.summary())
    return new_model
#############################################################################
def MyModel(loss, optimizer, window_size, horizon, n_features):
    new_model = tf.keras.Sequential([
        tf.keras.layers.InputLayer((window_size, n_features)),
        # tf.keras.layers.Conv1D(filters=window_size +  horizon, kernel_size=3, activation='relu', padding='causal'),
        # tf.keras.layers.MaxPooling1D(pool_size=2),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32, return_sequences=True)),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(16, return_sequences=False)),
        tf.keras.layers.Dropout(0.4),
        tf.keras.layers.Dense(n_features * horizon, activation='relu'),
        tf.keras.layers.Reshape((horizon, n_features)),
    ])
    return compile_model(new_model, loss, optimizer)
#############################################################################
def MyCallbacks(patience):
    """
    Devuelve una lista de callbacks para el entrenamiento del modelo.

    Parameters:
    patience (int): Número de épocas a esperar para ver una mejora en 'val_loss' antes de detener el entrenamiento.

    Returns:
    list: Lista de callbacks de Keras.
    """
    early_stop = EarlyStopping(monitor='val_loss', patience=patience, restore_best_weights=True)
    return [early_stop]
#############################################################################
def plot_history(history, start_epoch=0, metrics=None):
    if isinstance(metrics, str):
        metrics = [metrics]

    if metrics is None:
        metrics = [x for x in history.history.keys() if x[:4] != 'val_']

    if len(metrics) == 0:
        print('No metrics to display.')
        return

    # Get the epochs and filter them starting from start_epoch
    x = history.epoch[start_epoch:]

    rows = 1
    cols = len(metrics)
    count = 0

    plt.figure(figsize=(12 * cols, 8))

    for metric in sorted(metrics):
        count += 1
        plt.subplot(rows, cols, count)
        plt.plot(x, history.history[metric][start_epoch:], label='Train')
        val_metric = f'val_{metric}'
        if val_metric in history.history.keys():
            plt.plot(x, history.history[val_metric][start_epoch:], label='Validation')
        plt.title(metric.capitalize())
        plt.legend()
    plt.show()
################################################################

In [133]:
# Code to read csv file into Colaboratory:
# !pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth, drive
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)



################################# Datasets ###################################
# # Ventas
id = "158aOjqxaNO8l97yA6VWJkek_15YVLMhs"
downloaded = drive.CreateFile({'id':id})
downloaded.GetContentFile('sell-in.txt')
data_ventas = pd.read_csv("sell-in.txt", sep="\t")
data_ventas['periodo'] = pd.to_datetime(data_ventas['periodo'], format='%Y%m')
data_ventas['customer_id'] = data_ventas['customer_id'].astype(str)
data_ventas['product_id'] = data_ventas['product_id'].astype(str)
data = data_ventas.copy()

# # Productos
id = "15JS_k86LS0sgJXma7BOVXWlyNcMwxdhE"
downloaded = drive.CreateFile({'id':id})
downloaded.GetContentFile('tb_productos.txt')
data_productos = pd.read_csv("tb_productos.txt", sep="\t")
data_productos['product_id'] = data_productos['product_id'].astype(str)

# # Stocks
id = "15EV-8f_U7onpA1AcTxxXeD-z8yVR4fQu"
downloaded = drive.CreateFile({'id':id})
downloaded.GetContentFile('tb_stocks.txt')
data_stocks = pd.read_csv("tb_stocks.txt", sep="\t")
data_stocks['periodo'] = pd.to_datetime(data_stocks['periodo'], format='%Y%m')
data_stocks['product_id'] = data_stocks['product_id'].astype(str)

# # Productos a predecir
id = "15LjADctFVwjzQFJvfJGFTEdgZx9xCoId"
downloaded = drive.CreateFile({'id':id})
downloaded.GetContentFile('productos_a_predecir.txt')
data_productos_a_predecir = pd.read_csv("productos_a_predecir.txt", sep="\t")
data_productos_a_predecir['product_id'] = data_productos_a_predecir['product_id'].astype(str)
data_productos_a_predecir_con_categorias = data_productos_a_predecir.set_index('product_id').join(data_productos.drop_duplicates('product_id').set_index('product_id').sort_index()[['cat1', 'cat2', 'cat3']])


data = data.query('product_id == "20001"').groupby('periodo')['tn'].sum()
data.index.freq = 'MS'
data = data.to_frame()
# data

Unnamed: 0_level_0,tn
periodo,Unnamed: 1_level_1
2017-01-01,934.77222
2017-02-01,798.0162
2017-03-01,1303.35771
2017-04-01,1069.9613
2017-05-01,1502.20132
2017-06-01,1520.06539
2017-07-01,1030.67391
2017-08-01,1267.39462
2017-09-01,1316.94604
2017-10-01,1439.75563


In [206]:
# Normal Split
normalization = 'MinMax'
window_size = 6
horizon = 1
batch_size = 32


data_norm, data_norm_params = normalize_data(data, normalization=normalization)
data_norm_train, data_norm_valid = split_data(data_norm)
data_train_wrangled = windowed_dataset(data_norm_train, 'train', window_size, horizon, batch_size)
data_valid_wrangled = windowed_dataset(data_norm_valid, 'valid', window_size, horizon, batch_size)

model_name = 'TimeSeries'
loss = 'mse'
optimizer = 'adam'
n_features = data.shape[1]
patience = 3
epochs = 20

callbacks = MyCallbacks(patience)
model = MyModel(loss, optimizer, window_size, horizon, n_features)

history = model.fit(
    data_train_wrangled,
    validation_data = data_valid_wrangled,
    epochs=epochs,
    verbose=2,
    callbacks = callbacks)

# Evaluar el modelo en el conjunto de validación
val_loss = model.evaluate(data_valid_wrangled)
print(f'Loss: {val_loss}')


# plot_history(history)

Model: "sequential_38"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional_60 (Bidirect  (None, 6, 64)             8704      
 ional)                                                          
                                                                 
 bidirectional_61 (Bidirect  (None, 32)                10368     
 ional)                                                          
                                                                 
 dropout_30 (Dropout)        (None, 32)                0         
                                                                 
 dense_46 (Dense)            (None, 1)                 33        
                                                                 
 reshape_30 (Reshape)        (None, 1, 1)              0         
                                                                 
Total params: 19105 (74.63 KB)
Trainable params: 1910

  saving_api.save_model(


1/1 - 0s - loss: 0.1277 - mae: 0.3230 - val_loss: 0.6146 - val_mae: 0.7666 - 41ms/epoch - 41ms/step
Epoch 3/20
1/1 - 0s - loss: 0.1225 - mae: 0.3186 - val_loss: 0.6144 - val_mae: 0.7663 - 73ms/epoch - 73ms/step
Epoch 4/20
1/1 - 0s - loss: 0.1275 - mae: 0.3221 - val_loss: 0.6114 - val_mae: 0.7638 - 70ms/epoch - 70ms/step
Epoch 5/20
1/1 - 0s - loss: 0.1206 - mae: 0.3133 - val_loss: 0.5858 - val_mae: 0.7473 - 72ms/epoch - 72ms/step
Epoch 6/20
1/1 - 0s - loss: 0.1028 - mae: 0.2875 - val_loss: 0.5474 - val_mae: 0.7221 - 78ms/epoch - 78ms/step
Epoch 7/20
1/1 - 0s - loss: 0.1121 - mae: 0.2980 - val_loss: 0.5072 - val_mae: 0.6947 - 70ms/epoch - 70ms/step
Epoch 8/20
1/1 - 0s - loss: 0.0986 - mae: 0.2678 - val_loss: 0.4663 - val_mae: 0.6657 - 72ms/epoch - 72ms/step
Epoch 9/20
1/1 - 0s - loss: 0.0661 - mae: 0.2214 - val_loss: 0.4255 - val_mae: 0.6352 - 71ms/epoch - 71ms/step
Epoch 10/20
1/1 - 0s - loss: 0.0722 - mae: 0.2184 - val_loss: 0.3853 - val_mae: 0.6036 - 73ms/epoch - 73ms/step
Epoch 11/20

In [187]:
import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
import tensorflow as tf

# ... (rest of the code remains the same)

# TimeSeriesSplit
tscv = TimeSeriesSplit(n_splits=n_splits)

# Iterar sobre cada split
for i, (train_index, test_index) in enumerate(tscv.split(data_norm)):
    train_tscv = data_norm.iloc[train_index]
    test_tscv = data_norm.iloc[test_index]

    # ... (rest of the code remains the same)

    # Crear datasets de ventanas
    data_train_wrangled = windowed_dataset(train_tscv.values, 'train', window_size, horizon, batch_size)
    data_valid_wrangled = windowed_dataset(test_tscv.values, 'test', window_size, horizon, batch_size)

    # Check if datasets are empty and adjust if necessary
    if len(list(data_train_wrangled)) == 0 or len(list(data_valid_wrangled)) == 0:
        print(f"Warning: Empty dataset encountered for split {i+1}. Skipping this split.")
        continue  # Skip to the next split

    # ... (rest of the code remains the same)

1

In [205]:
# TimeSeries Split
normalization = 'MinMax'
window_size = 3
horizon = 1
batch_size = 32
n_splits = 5

data_norm, data_norm_params = normalize_data(data, normalization=normalization)

from sklearn.model_selection import TimeSeriesSplit

# TimeSeriesSplit: 3 splits para ejemplo
tscv = TimeSeriesSplit(n_splits=n_splits)

# Almacenar las pérdidas para cada split
split_losses = []

# Iterar sobre cada split
for i, (train_index, test_index) in enumerate(tscv.split(data_norm)):
    train_tscv = data.iloc[train_index]
    test_tscv = data.iloc[test_index]
    # print(i)

    # Crear datasets de ventanas
    # data_norm_train, data_norm_valid = split_data(data_norm)
    data_train_wrangled = windowed_dataset(train_tscv.values, 'train', window_size, horizon, batch_size)
    data_valid_wrangled = windowed_dataset(test_tscv.values, 'test', window_size, horizon, batch_size)

    # Check if datasets are empty and adjust if necessary
    if len(list(data_train_wrangled)) == 0 or len(list(data_valid_wrangled)) == 0:
      print(f"Warning: Empty dataset encountered for split {i+1}. Skipping this split.")
      continue  # Skip to the next split

    model_name = 'TimeSeries'
    loss = 'mse'
    optimizer = 'adam'
    n_features = data.shape[1]
    patience = 3
    epochs = 20

    callbacks = MyCallbacks(patience)
    model = MyModel(loss, optimizer, window_size, horizon, n_features)

    history = model.fit(
        data_train_wrangled,
        validation_data = data_valid_wrangled,
        epochs=epochs,
        verbose=2,
        callbacks = callbacks)

    # Evaluar el modelo en el conjunto de validación
    val_loss = model.evaluate(data_valid_wrangled)
    print(f'Split {i+1} - Loss: {val_loss}')
    split_losses.append(val_loss)

# Promedio de las pérdidas en todos los splits
avg_loss = np.mean(split_losses)
print(f'Average Loss across all splits: {avg_loss}')


Model: "sequential_33"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional_50 (Bidirect  (None, 3, 64)             8704      
 ional)                                                          
                                                                 
 bidirectional_51 (Bidirect  (None, 32)                10368     
 ional)                                                          
                                                                 
 dropout_25 (Dropout)        (None, 32)                0         
                                                                 
 dense_41 (Dense)            (None, 1)                 33        
                                                                 
 reshape_25 (Reshape)        (None, 1, 1)              0         
                                                                 
Total params: 19105 (74.63 KB)
Trainable params: 1910

  saving_api.save_model(


1/1 - 0s - loss: 0.3920 - mae: 0.6226 - val_loss: 0.2644 - val_mae: 0.5123 - 70ms/epoch - 70ms/step
Epoch 4/20
1/1 - 0s - loss: 0.3817 - mae: 0.6099 - val_loss: 0.2487 - val_mae: 0.4967 - 74ms/epoch - 74ms/step
Epoch 5/20
1/1 - 0s - loss: 0.3594 - mae: 0.5898 - val_loss: 0.2330 - val_mae: 0.4807 - 65ms/epoch - 65ms/step
Epoch 6/20
1/1 - 0s - loss: 0.3422 - mae: 0.5784 - val_loss: 0.2176 - val_mae: 0.4644 - 67ms/epoch - 67ms/step
Epoch 7/20
1/1 - 0s - loss: 0.3277 - mae: 0.5649 - val_loss: 0.2023 - val_mae: 0.4477 - 68ms/epoch - 68ms/step
Epoch 8/20
1/1 - 0s - loss: 0.3175 - mae: 0.5592 - val_loss: 0.1874 - val_mae: 0.4307 - 67ms/epoch - 67ms/step
Epoch 9/20
1/1 - 0s - loss: 0.2981 - mae: 0.5322 - val_loss: 0.1728 - val_mae: 0.4134 - 71ms/epoch - 71ms/step
Epoch 10/20
1/1 - 0s - loss: 0.2563 - mae: 0.4971 - val_loss: 0.1583 - val_mae: 0.3956 - 70ms/epoch - 70ms/step
Epoch 11/20
1/1 - 0s - loss: 0.2830 - mae: 0.5254 - val_loss: 0.1443 - val_mae: 0.3776 - 81ms/epoch - 81ms/step
Epoch 12/2

In [198]:
import tensorflow as tf

#############################################################################
def compile_model(new_model, loss, optimizer):
    new_model.compile(optimizer=optimizer, loss=loss, metrics=['mae'])
    print(new_model.summary())
    return new_model
#############################################################################
def MyModel(loss, optimizer, window_size, horizon, n_features):
    new_model = tf.keras.Sequential([
        tf.keras.layers.InputLayer((window_size, n_features)),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32, return_sequences=True)),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(16, return_sequences=False)),
        tf.keras.layers.Dropout(0.4),
        tf.keras.layers.Dense(n_features * horizon, activation='relu'),
        tf.keras.layers.Reshape((horizon, n_features)),
    ])
    return compile_model(new_model, loss, optimizer)

#############################################################################
def windowed_dataset(sequence, data_split, window_size, horizon, batch_size, shuffle_buffer=1000):
    """Generates dataset windows.

    Args:
      sequence (array-like): Contains the values of the time series.
      data_split (str): Specifies if the dataset is for training or validation/test.
      window_size (int): The number of time steps to include in the feature.
      horizon (int): The number of future time steps to predict.
      batch_size (int): The batch size.
      shuffle_buffer (int): Buffer size to use for the shuffle method.

    Returns:
      tf.data.Dataset: TF Dataset containing time windows.
    """

    # Generate a TF Dataset from the series values
    dataset = tf.data.Dataset.from_tensor_slices(sequence)

    # Window the data but only take those with the specified size
    dataset = dataset.window(window_size + horizon, shift=1, drop_remainder=True)

    # Flatten the windows by putting its elements in a single batch
    dataset = dataset.flat_map(lambda window: window.batch(window_size + horizon))

    # Create tuples with features and labels
    dataset = dataset.map(lambda window: (window[:-horizon], window[-horizon:]))

    if data_split == 'train':
        # Shuffle the training data to improve generalization
        dataset = dataset.shuffle(shuffle_buffer)
    else:
        # Cache the validation/test data for improved performance
        dataset = dataset.cache()

    # Create batches of windows and prefetch for performance
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)

    return dataset


In [201]:
import pandas as pd
import numpy as np
from sklearn.model_selection import TimeSeriesSplit
import tensorflow as tf

# Función de normalización (supongamos ya definida)
def normalize_data(df, normalization="MinMax"):
    from sklearn.preprocessing import MinMaxScaler, StandardScaler

    if normalization == "MinMax":
        scaler = MinMaxScaler()
    elif normalization == "Standard":
        scaler = StandardScaler()
    else:
        raise ValueError("Normalization method not supported.")

    data_norm = pd.DataFrame(scaler.fit_transform(df), columns=df.columns, index=df.index)
    return data_norm, scaler

# Función de callbacks (supuesta ya definida)
def MyCallbacks(patience):
    return [
        tf.keras.callbacks.EarlyStopping(patience=patience, restore_best_weights=True),
        tf.keras.callbacks.ModelCheckpoint(filepath='model_checkpoint.h5', save_best_only=True)
    ]

#############################################################################
def compile_model(new_model, loss, optimizer):
    new_model.compile(optimizer=optimizer, loss=loss, metrics=['mae'])
    print(new_model.summary())
    return new_model

#############################################################################
def MyModel(loss, optimizer, window_size, horizon, n_features):
    new_model = tf.keras.Sequential([
        tf.keras.layers.InputLayer((window_size, n_features)),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32, return_sequences=True)),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(16, return_sequences=False)),
        tf.keras.layers.Dropout(0.4),
        tf.keras.layers.Dense(n_features * horizon, activation='relu'),
        tf.keras.layers.Reshape((horizon, n_features)),
    ])
    return compile_model(new_model, loss, optimizer)

#############################################################################
def windowed_dataset(sequence, data_split, window_size, horizon, batch_size, shuffle_buffer=1000):
    """Generates dataset windows.

    Args:
      sequence (array-like): Contains the values of the time series.
      data_split (str): Specifies if the dataset is for training or validation/test.
      window_size (int): The number of time steps to include in the feature.
      horizon (int): The number of future time steps to predict.
      batch_size (int): The batch size.
      shuffle_buffer (int): Buffer size to use for the shuffle method.

    Returns:
      tf.data.Dataset: TF Dataset containing time windows.
    """

    # Generate a TF Dataset from the series values
    dataset = tf.data.Dataset.from_tensor_slices(sequence)

    # Window the data but only take those with the specified size
    dataset = dataset.window(window_size + horizon, shift=1, drop_remainder=True)

    # Flatten the windows by putting its elements in a single batch
    dataset = dataset.flat_map(lambda window: window.batch(window_size + horizon))

    # Create tuples with features and labels
    dataset = dataset.map(lambda window: (window[:-horizon], window[-horizon:]))

    if data_split == 'train':
        # Shuffle the training data to improve generalization
        dataset = dataset.shuffle(shuffle_buffer)
    else:
        # Cache the validation/test data for improved performance
        dataset = dataset.cache()

    # Create batches of windows and prefetch for performance
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)

    return dataset

# Crear datos ficticios con estacionalidad
np.random.seed(0)
date_range = pd.date_range(start='1/1/2018', periods=36, freq='M')
trend = np.linspace(0, 1, 36)
seasonal = 0.5 * np.sin(np.linspace(0, 3 * np.pi, 36))
random = 0.1 * np.random.randn(36)
data = pd.DataFrame(trend + seasonal + random, index=date_range, columns=['value'])

# Normalización de los datos
normalization = 'MinMax'
window_size = 6
horizon = 1
batch_size = 32
n_splits = 5
data_norm, data_norm_params = normalize_data(data, normalization=normalization)

# TimeSeriesSplit
tscv = TimeSeriesSplit(n_splits=n_splits)

# Iterar sobre cada split
for i, (train_index, test_index) in enumerate(tscv.split(data_norm)):
    train_tscv = data_norm.iloc[train_index]
    test_tscv = data_norm.iloc[test_index]

    print(f'Split {i+1}')
    print(f'Train indices: {train_index}')
    print(f'Test indices: {test_index}')
    print(f'Train data shape: {train_tscv.shape}')
    print(f'Test data shape: {test_tscv.shape}')

    # Crear datasets de ventanas
    data_train_wrangled = windowed_dataset(train_tscv.values, 'train', window_size, horizon, batch_size)
    data_valid_wrangled = windowed_dataset(test_tscv.values, 'test', window_size, horizon, batch_size)

    # Verificar que los datasets no estén vacíos
    print(f'Train dataset size: {len(list(data_train_wrangled))}')
    print(f'Validation dataset size: {len(list(data_valid_wrangled))}')

    # Configuración del modelo
    model_name = 'TimeSeries'
    loss = 'mse'
    optimizer = 'adam'
    n_features = data_norm.shape[1]
    patience = 3
    epochs = 20

    callbacks = MyCallbacks(patience)
    model = MyModel(loss, optimizer, window_size, horizon, n_features)

    # Entrenar el modelo
    history = model.fit(
        data_train_wrangled,
        validation_data=data_valid_wrangled,
        epochs=epochs,
        verbose=2,
        callbacks=callbacks
    )

    # Evaluar el modelo en el conjunto de validación
    val_loss = model.evaluate(data_valid_wrangled)
    print(f'Split {i+1} - Loss: {val_loss}')


Split 1
Train indices: [0 1 2 3 4 5]
Test indices: [ 6  7  8  9 10 11]
Train data shape: (6, 1)
Test data shape: (6, 1)
Train dataset size: 0
Validation dataset size: 0
Model: "sequential_26"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 bidirectional_36 (Bidirect  (None, 6, 64)             8704      
 ional)                                                          
                                                                 
 bidirectional_37 (Bidirect  (None, 32)                10368     
 ional)                                                          
                                                                 
 dropout_18 (Dropout)        (None, 32)                0         
                                                                 
 dense_34 (Dense)            (None, 1)                 33        
                                                                 
 reshape_18 (Res

ValueError: Unexpected result of `train_function` (Empty logs). This could be due to issues in input pipeline that resulted in an empty dataset. Otherwise, please use `Model.compile(..., run_eagerly=True)`, or `tf.config.run_functions_eagerly(True)` for more information of where went wrong, or file a issue/bug to `tf.keras`.

In [None]:
# Seleccionar los últimos x meses de data_train
#   data_for_prediction = data_train[-window_size:]
#   # Convierte los datos a un formato compatible con la función window_dataset
#   data_for_prediction = data_for_prediction.values.reshape((1, window_size, n_features))
#   predictions = model.predict(data_for_prediction)

#   # # Convertir las predicciones a un DataFrame para desnormalizar
#   predictions_df = pd.DataFrame(predictions[0], columns=data_train.columns)

#   # # Desnormalizar las predicciones
#   predictions_denorm_cat1 = denormalize_series(predictions_df, data_norm_params, normalization=normalization).iloc[1]

#   # Voy sumando las predicciones de cada categoria
#   predictions_acum = predictions_acum.add(predictions_denorm_cat1, fill_value=0)