# Timeseries Forecasting on Transaction Data

## 1. Installing Dependencies

In [None]:
# # Pip for evaluation metrics
# !pip install datasetsforecast
# !pip install sktime
# !pip install entropyhub

In [None]:
# Basics
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

# Some functions for plotting and stuff
import ts_utils as ts_utils

## 2. Data Preparation

In [None]:
# Size of the data to read
data_size = 'norm'

# Date of the data to read
data_date = '2110' # '2110' = 21st of October

# Read the data (takes around 2 minutes)
dataset = pd.read_csv(f"~/Thesis/data/eod_balances_{data_date}_{data_size}.csv")

dataset

### 2.1 In-sample and Out-sample split

In [None]:
# Calculate total amount of timeseries
num_timeseries = len(dataset.columns) - 1

# Specify train test split percentage
train_test_split = 0.8

# Split into train and out of sample test data
num_out_of_sample = int(train_test_split * num_timeseries)

# Create in-sample dataframe
in_sample_data = dataset.iloc[:, : num_out_of_sample + 1] # Training and testing

# Create out-sample dataframe
n = num_timeseries-num_out_of_sample
columns_to_keep = dataset.columns[[0]].tolist() + dataset.columns[-n:].tolist()
out_sample_data = dataset[columns_to_keep]

## 3. In-sample analysis

### 3.1 Train/Test splitting and plotting

In [None]:
# Change the data to the long format
Y_df = in_sample_data.melt(id_vars=['date'], var_name='unique_id', value_name='y')
Y_df = Y_df.rename(columns={'date':'ds'})

# Convert date column to datetime type
Y_df['ds'] = pd.to_datetime(Y_df['ds'])

In [None]:
# Define the horizon (12 months of 30 days each)
fh = 30
horizon = 12 * fh

# Identify the unique dates in the dataset
unique_dates = Y_df['ds'].unique()

# Convert to a list and then sort the dates
unique_dates = sorted(list(unique_dates))

# Determine the cutoff date (cutoff at 12 months before the last date in the dataset)
cutoff_date = unique_dates[-(horizon + 1)]

# Training data: all data up to the cutoff date
Y_train_df = Y_df[Y_df['ds'] <= cutoff_date]

In [None]:
# Initialize lists to store the input and test sets
input_dfs = []
test_dfs = []

# Loop to create the 6 input and test sets
for i in range(6):
    # Determine the start date of the test period
    test_start_date = unique_dates[-(horizon - i * 2 * fh)]
    test_end_date = unique_dates[-(horizon - (i * 2 * fh) - fh)]
    
    # Input data: all data up to the start of the current test period
    input_df = Y_df[Y_df['ds'] <= test_start_date]
    input_dfs.append(input_df)
    
    # Test data: the 30-day period following the start of the test period
    test_df = Y_df[(Y_df['ds'] > test_start_date) & (Y_df['ds'] <= test_end_date)]
    test_dfs.append(test_df)

# Define the 6 input periods
Y_input_df_0 = input_dfs[0]
Y_input_df_1 = input_dfs[1]
Y_input_df_2 = input_dfs[2]
Y_input_df_3 = input_dfs[3]
Y_input_df_4 = input_dfs[4]
Y_input_df_5 = input_dfs[5]

# Define the 6 test periods
Y_test_df_0 = test_dfs[0]
Y_test_df_1 = test_dfs[1]
Y_test_df_2 = test_dfs[2]
Y_test_df_3 = test_dfs[3]
Y_test_df_4 = test_dfs[4]
Y_test_df_5 = test_dfs[5]

In [None]:
# Timeserie to plot
unique_id = '6'

# Plot the train and test dataframes
ts_utils.plot_train_test_split(Y_input_df_0, Y_test_df_0, unique_id)

### 3.2 Retrieve Predictions

In [None]:
# Define the models and periods
models = ['Naive', 'ARIMA', 'ETS', 'NHITS', 'PatchTST', 'TimesNet', 'DeepAR', 'Chronos-small', 'Chronos-large', 'Chronos-FT']
periods = ['period01', 'period02', 'period03', 'period04', 'period05', 'period06']

In [None]:
# Create Y_test_dfs as a list of test dataframes for each period
Y_test_dfs = [Y_test_df_0, Y_test_df_1, Y_test_df_2, Y_test_df_3, Y_test_df_4, Y_test_df_5]

In [None]:
# Initialize a dictionary to hold the prediction dataframes for each period
Y_pred_dfs = {}

In [None]:
# Merging the predictions
def merging_preds(Y_pred_df, model_preds, model_name):
    # Ensure 'unique_id' is string and 'ds' is datetime
    model_preds['unique_id'] = model_preds['unique_id'].astype('string')
    model_preds['ds'] = pd.to_datetime(model_preds['ds'])
    
    # Merge predictions on 'unique_id' and 'ds'
    Y_pred_df = Y_pred_df.merge(model_preds[['unique_id', 'ds', f'{model_name}']], on=['unique_id', 'ds'], how='left')
    
    return Y_pred_df

In [None]:
# Loop over periods to get predictions
for i, period in enumerate(periods):
    print(f"Processing {period}...")
    
    # Get the test dataframe for this period
    Y_pred_df = Y_test_dfs[i].copy()
    
    # Ensure 'unique_id' is string and 'ds' is datetime
    Y_pred_df['unique_id'] = Y_pred_df['unique_id'].astype('string')
    Y_pred_df['ds'] = pd.to_datetime(Y_pred_df['ds'])
    
    # Loop over models to merge predictions
    for model in models:
        # Read the prediction csv
        model_preds = pd.read_csv(f"predictions/{model}/insample/{period}/model_preds_{data_date}_{data_size}.csv")
        
        # Merge the predictions into Y_pred_df
        Y_pred_df = merging_preds(Y_pred_df, model_preds, model)

    # Set 'unique_id' as index if needed
    Y_pred_df = Y_pred_df.set_index('unique_id')
    
    # Rename columns if necessary
    Y_pred_df = Y_pred_df.rename(columns={"Chronos-small": "Chronos (small)", "Chronos-large": "Chronos (large)", "Chronos-FT": "Chronos (FT)"})
    
    # Store the dataframe in the dictionary
    Y_pred_dfs[period] = Y_pred_df

In [None]:
# Y_input_dfs as a list of input dataframes for each period
Y_input_dfs = [Y_input_df_0, Y_input_df_1, Y_input_df_2, Y_input_df_3, Y_input_df_4, Y_input_df_5]

# Function to plot predictions for a given unique_id
def plot_model_predictions(unique_id, Y_input_dfs, Y_pred_dfs, model_list, history_days=100, grid_shape='3x2'):
    # Parse grid_shape into nrows and ncols
    if grid_shape == '3x2':
        nrows, ncols = 3, 2
    elif grid_shape == '6x1':
        nrows, ncols = 6, 1
    else:
        # Default to 6x1 if invalid input
        nrows, ncols = 6, 1

    # Prepare to find overall x and y limits
    all_dates = []
    all_values = []

    # Define colors for the different regions
    colors = {
        'train': '#a6bddb',
        'input': '#fd8d3c',
        'test': '#feb24c',
    }

    # Distinct model colors
    colors_list = ['purple', 'orange', 'cyan', 'magenta', 'brown', 'red', 'green', 'olive', 'navy', 'teal']
    
    # Create a mapping from model names to colors
    model_colors = dict(zip(model_list, colors_list))

    # First, determine the cutoff_date (end of train data)
    # We'll use the last date from the first Y_input_df
    Y_input_df_first = Y_input_dfs[0].copy()
    Y_input_df_first['ds'] = pd.to_datetime(Y_input_df_first['ds'])
    if Y_input_df_first.index.name != 'unique_id':
        Y_input_df_first = Y_input_df_first.set_index('unique_id')
    try:
        Y_input_ts_first = Y_input_df_first.loc[unique_id].copy()
    except KeyError:
        print(f"unique_id '{unique_id}' not found in the first Y_input_df")
        return
    if isinstance(Y_input_ts_first, pd.Series):
        Y_input_ts_first = Y_input_ts_first.to_frame().T
    cutoff_date = Y_input_ts_first['ds'].max()

    # Calculate start date for historical data
    history_start_date = cutoff_date - pd.Timedelta(days=history_days)

    # Collect all dates and values for axis limits
    for i, period in enumerate(periods):
        # Get input data for the period
        Y_input_df = Y_input_dfs[i].copy()
        Y_input_df['ds'] = pd.to_datetime(Y_input_df['ds'])

        # Ensure 'unique_id' is the index
        if Y_input_df.index.name != 'unique_id':
            Y_input_df = Y_input_df.set_index('unique_id')

        # Get prediction data for the period
        Y_pred_df = Y_pred_dfs[period].copy()
        Y_pred_df['ds'] = pd.to_datetime(Y_pred_df['ds'])

        # Ensure 'unique_id' is the index
        if Y_pred_df.index.name != 'unique_id':
            Y_pred_df = Y_pred_df.set_index('unique_id')

        # Filter data for the specific unique_id
        try:
            Y_input_ts = Y_input_df.loc[unique_id].copy()
        except KeyError:
            continue

        try:
            Y_pred_ts = Y_pred_df.loc[unique_id].copy()
        except KeyError:
            continue

        # If the result is a Series (only one entry), convert it to DataFrame
        if isinstance(Y_input_ts, pd.Series):
            Y_input_ts = Y_input_ts.to_frame().T

        if isinstance(Y_pred_ts, pd.Series):
            Y_pred_ts = Y_pred_ts.to_frame().T

        # Determine the start and end date of the test period for this period
        test_start_date = Y_pred_ts['ds'].min()
        test_end_date = Y_pred_ts['ds'].max()

        # Limit historical data to the specified number of days before test_start_date
        Y_input_ts = Y_input_ts[(Y_input_ts['ds'] >= history_start_date) & (Y_input_ts['ds'] <= test_start_date)]

        # Collect dates and values
        all_dates.extend(Y_input_ts['ds'].tolist())
        all_values.extend(Y_input_ts['y'].tolist())
        all_dates.extend(Y_pred_ts['ds'].tolist())
        all_values.extend(Y_pred_ts['y'].tolist())

        # Collect values from model predictions
        for model in model_list:
            if model in Y_pred_ts.columns:
                all_values.extend(Y_pred_ts[model].tolist())

    # Determine overall x and y limits with padding
    x_min = min(all_dates)
    x_max = max(all_dates)
    y_min = min(all_values)
    y_max = max(all_values)

    # Calculate padding
    x_range = x_max - x_min
    y_range = y_max - y_min
    x_padding = x_range * 0.05  # 5% padding on x-axis
    y_padding = y_range * 0.05  # 5% padding on y-axis

    # Adjust x_min and x_max with padding
    x_min_padded = x_min - x_padding
    x_max_padded = x_max + x_padding

    # Adjust y_min and y_max with padding
    y_min_padded = y_min - y_padding
    y_max_padded = y_max + y_padding

    # Create the grid of subplots based on grid_shape
    fig, axs = plt.subplots(nrows, ncols, figsize=(18, 5 * nrows))
    axs = axs.flatten()  # Flatten to easily index subplots

    for i, period in enumerate(periods):
        ax = axs[i]

        # Get input data for the period
        Y_input_df = Y_input_dfs[i].copy()
        Y_input_df['ds'] = pd.to_datetime(Y_input_df['ds'])

        # Ensure 'unique_id' is the index
        if Y_input_df.index.name != 'unique_id':
            Y_input_df = Y_input_df.set_index('unique_id')

        # Get prediction data for the period
        Y_pred_df = Y_pred_dfs[period].copy()
        Y_pred_df['ds'] = pd.to_datetime(Y_pred_df['ds'])

        # Ensure 'unique_id' is the index
        if Y_pred_df.index.name != 'unique_id':
            Y_pred_df = Y_pred_df.set_index('unique_id')

        # Filter data for the specific unique_id
        try:
            Y_input_ts = Y_input_df.loc[unique_id].copy()
        except KeyError:
            print(f"unique_id '{unique_id}' not found in Y_input_df for period '{period}'")
            continue

        try:
            Y_pred_ts = Y_pred_df.loc[unique_id].copy()
        except KeyError:
            print(f"unique_id '{unique_id}' not found in Y_pred_df for period '{period}'")
            continue

        # If the result is a Series (only one entry), convert it to DataFrame
        if isinstance(Y_input_ts, pd.Series):
            Y_input_ts = Y_input_ts.to_frame().T

        if isinstance(Y_pred_ts, pd.Series):
            Y_pred_ts = Y_pred_ts.to_frame().T

        # Determine the start and end date of the test period for this period
        test_start_date = Y_pred_ts['ds'].min()
        test_end_date = Y_pred_ts['ds'].max()

        # Limit historical data to the specified number of days before test_start_date
        Y_input_ts = Y_input_ts[(Y_input_ts['ds'] >= history_start_date) & (Y_input_ts['ds'] <= test_start_date)]

        # Sort by date
        Y_input_ts = Y_input_ts.sort_values('ds')
        Y_pred_ts = Y_pred_ts.sort_values('ds')

        # Combine historical and prediction data for continuous plotting
        combined_ts = pd.concat([Y_input_ts, Y_pred_ts], ignore_index=True)
        combined_ts = combined_ts.sort_values('ds')

        # Plot the historical data (before the test period)
        historical_data = combined_ts[combined_ts['ds'] <= test_start_date]
        ax.plot(historical_data['ds'], historical_data['y'], color='blue', linewidth=1)

        # Plot the actual data in the test period
        actual_test_data = combined_ts[(combined_ts['ds'] >= test_start_date) & (combined_ts['ds'] <= test_end_date)]
        ax.plot(actual_test_data['ds'], actual_test_data['y'], color='blue', linewidth=1, linestyle='--')

        # Add train data fill (diagonal lines) up to the cutoff_date
        train_data = historical_data[historical_data['ds'] <= cutoff_date]
        ax.fill_between(train_data['ds'], y_min, y_max,
                        facecolor='none', edgecolor=colors['train'], hatch='//', linewidth=0, alpha=0.5, label='Train Data')
        
        # Fill between for input data (from cutoff_date to test_start_date)
        input_data = historical_data
        ax.fill_between(input_data['ds'], y_min, y_max,
                        facecolor=colors['input'], alpha=0.1, label='Input Data')

        # Fill between for test data
        ax.fill_between(actual_test_data['ds'], y_min, y_max,
                        facecolor=colors['test'], alpha=0.2, label='Test Data')

        # Plot the model predictions
        for model in model_list:
            if model in Y_pred_ts.columns:
                color = model_colors.get(model, 'black')
                ax.plot(Y_pred_ts['ds'], Y_pred_ts[model], label=model, linewidth=0.9, color=color)
            else:
                print(f"Model '{model}' not found in predictions for period '{period}'")

        # Add vertical line to indicate the cutoff date (same for all plots)
        ax.axvline(cutoff_date, color='black', linestyle='dashdot', linewidth=0.75)

        # Add vertical gray dashed lines at the start and end of the test period
        ax.axvline(test_start_date, color='gray', linestyle='--', linewidth=0.75)
        ax.axvline(test_end_date, color='gray', linestyle='--', linewidth=0.75)

        # Set x and y limits with padding
        ax.set_xlim([x_min_padded, x_max_padded])
        ax.set_ylim([y_min_padded, y_max_padded])

        # Set title
        ax.set_title(f'Predictions for Period {i+1}')

        # Remove x-axis labels and ticks for all plots except the bottom row
        if i < (nrows - 1) * ncols:
            ax.set_xlabel('')
            ax.set_xticklabels([])
            ax.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
        else:
            ax.set_xlabel('Date')

        # Remove y-axis labels and ticks for plots on the right side
        if ncols > 1 and (i % ncols) == (ncols - 1):
            ax.set_ylabel('')
            ax.set_yticklabels([])
            ax.tick_params(axis='y', which='both', left=False, right=False, labelleft=False)
        else:
            ax.set_ylabel('Value')

        # Collect handles and labels for the legend
        handles, labels = ax.get_legend_handles_labels()

        # Filter out unwanted labels
        desired_labels = ['Train Data', 'Input Data', 'Test Data'] + model_list
        handles_labels = [(h, l) for h, l in zip(handles, labels) if l in desired_labels]

        # Update the legend
        if handles_labels:
            handles, labels = zip(*handles_labels)
            by_label = dict(zip(labels, handles))
            if period == 'period06':
                ax.legend(by_label.values(), by_label.keys(), loc='lower left')
            else:
                ax.legend(by_label.values(), by_label.keys(), loc='upper right')
        else:
            ax.legend().set_visible(False)

    # Remove any empty subplots if periods < total subplots
    for j in range(i+1, len(axs)):
        fig.delaxes(axs[j])

    # Adjust layout
    plt.tight_layout()
    plt.savefig('models/figures/period_plot.png', dpi=300)
    
    plt.show()

# Plot the predictions
unique_id = '6'
model_list = ['NHITS'] 

# Choose grid_shape='6x1' or '3x2'
plot_model_predictions(unique_id, Y_input_dfs, Y_pred_dfs, model_list, history_days=100, grid_shape='6x1')

## 4. In-Sample Evaluation

### 4.1 Metrics for the insample predictions

In [None]:
# Initialize an empty dictionary to store evaluation dataframes
evaluations = {}

# Loop over the 6 periods
for i in range(6):
    print(f"Calculating metrics for period{i+1:02d}...")
    
    # Get the period name
    period_name = f'period{i+1:02d}'
    
    # Get the prediction dataframe for this period
    Y_pred_df = Y_pred_dfs[period_name]
    
    # Get the corresponding test dataframe
    Y_test_df = Y_test_dfs[i]
    
    # Perform evaluation
    eval_1_day, eval_7_days, eval_14_days, eval_30_days = ts_utils.perform_evaluation(Y_train_df, Y_test_df, Y_pred_df)

    print(f"Saving metrics for {period_name}..")
    
    # Save the evaluation dataframes to CSV files
    eval_1_day.to_csv(f"metrics/insample/{period_name}/metrics_1_day_{data_date}_{data_size}.csv", index=False)
    eval_7_days.to_csv(f"metrics/insample/{period_name}/metrics_7_day_{data_date}_{data_size}.csv", index=False)
    eval_14_days.to_csv(f"metrics/insample/{period_name}/metrics_14_day_{data_date}_{data_size}.csv", index=False)
    eval_30_days.to_csv(f"metrics/insample/{period_name}/metrics_30_day_{data_date}_{data_size}.csv", index=False)
    
    # Store the evaluation dataframes in a dictionary
    evaluations[period_name] = {
        'eval_1_day': eval_1_day,
        'eval_7_days': eval_7_days,
        'eval_14_days': eval_14_days,
        'eval_30_days': eval_30_days
    }

## 5. Outsample Analysis

### 5.1 Train/Test splitting and plotting

In [None]:
# Change the data to the long format
Y_df = out_sample_data.melt(id_vars=['date'], var_name='unique_id', value_name='y')
Y_df = Y_df.rename(columns={'date':'ds'})

# Convert date column to datetime type
Y_df['ds'] = pd.to_datetime(Y_df['ds'])

In [None]:
# Define the horizon (12 months of 30 days each)
fh = 30
horizon = 12 * fh

# Identify the unique dates in the dataset
unique_dates = Y_df['ds'].unique()

# Convert to a list and then sort the dates
unique_dates = sorted(list(unique_dates))

# Determine the cutoff date (cutoff at 12 months before the last date in the dataset)
cutoff_date = unique_dates[-(horizon + 1)]

# Training data: all data up to the cutoff date
Y_train_df = Y_df[Y_df['ds'] <= cutoff_date]

In [None]:
# Initialize lists to store the input and test sets
input_dfs = []
test_dfs = []

# Loop to create the 6 input and test sets
for i in range(6):
    # Determine the start date of the test period
    test_start_date = unique_dates[-(horizon - i * 2 * fh)]
    test_end_date = unique_dates[-(horizon - (i * 2 * fh) - fh)]
    
    # Input data: all data up to the start of the current test period
    input_df = Y_df[Y_df['ds'] <= test_start_date]
    input_dfs.append(input_df)
    
    # Test data: the 30-day period following the start of the test period
    test_df = Y_df[(Y_df['ds'] > test_start_date) & (Y_df['ds'] <= test_end_date)]
    test_dfs.append(test_df)

# Define the 6 input periods
Y_input_df_0 = input_dfs[0]
Y_input_df_1 = input_dfs[1]
Y_input_df_2 = input_dfs[2]
Y_input_df_3 = input_dfs[3]
Y_input_df_4 = input_dfs[4]
Y_input_df_5 = input_dfs[5]

# Define the 6 test periods
Y_test_df_0 = test_dfs[0]
Y_test_df_1 = test_dfs[1]
Y_test_df_2 = test_dfs[2]
Y_test_df_3 = test_dfs[3]
Y_test_df_4 = test_dfs[4]
Y_test_df_5 = test_dfs[5]

In [None]:
# Timeserie to plot
unique_id = Y_input_df_0['unique_id'][0]

# Plot the train and test dataframes
ts_utils.plot_train_test_split(Y_input_df_0, Y_test_df_0, unique_id)

### 5.2 Retrieve Predictions

In [None]:
# Define the models and periods
models = ['Naive', 'ARIMA', 'ETS', 'NHITS', 'PatchTST', 'TimesNet', 'DeepAR', 'Chronos-small', 'Chronos-large', 'Chronos-FT']
periods = ['period01', 'period02', 'period03', 'period04', 'period05', 'period06']

# Create Y_test_dfs as a list of test dataframes for each period
Y_test_dfs = [Y_test_df_0, Y_test_df_1, Y_test_df_2, Y_test_df_3, Y_test_df_4, Y_test_df_5]

# Initialize a dictionary to hold the prediction dataframes for each period
Y_pred_dfs = {}

In [None]:
# Merging the predictions
def merging_preds(Y_pred_df, model_preds, model_name):
    # Ensure 'unique_id' is string and 'ds' is datetime
    model_preds['unique_id'] = model_preds['unique_id'].astype('string')
    model_preds['ds'] = pd.to_datetime(model_preds['ds'])
    
    # Merge predictions on 'unique_id' and 'ds'
    Y_pred_df = Y_pred_df.merge(model_preds[['unique_id', 'ds', f'{model_name}']], on=['unique_id', 'ds'], how='left')
    
    return Y_pred_df

In [None]:
# Loop over periods to get predictions
for i, period in enumerate(periods):
    print(f"Processing {period}...")
    
    # Get the test dataframe for this period
    Y_pred_df = Y_test_dfs[i].copy()
    
    # Ensure 'unique_id' is string and 'ds' is datetime
    Y_pred_df['unique_id'] = Y_pred_df['unique_id'].astype('string')
    Y_pred_df['ds'] = pd.to_datetime(Y_pred_df['ds'])
    
    # Loop over models to merge predictions
    for model in models:
        # Read the prediction csv
        model_preds = pd.read_csv(f"predictions/{model}/outsample/{period}/model_preds_{data_date}_{data_size}.csv")
        
        # Merge the predictions into Y_pred_df
        Y_pred_df = merging_preds(Y_pred_df, model_preds, model)

    # Set 'unique_id' as index if needed
    Y_pred_df = Y_pred_df.set_index('unique_id')
    
    # Rename columns if necessary
    Y_pred_df = Y_pred_df.rename(columns={"Chronos-small": "Chronos (small)", "Chronos-large": "Chronos (large)", "Chronos-FT": "Chronos (FT)"})
    
    # Store the dataframe in the dictionary
    Y_pred_dfs[period] = Y_pred_df

In [None]:
# Y_input_dfs as a list of input dataframes for each period
Y_input_dfs = [Y_input_df_0, Y_input_df_1, Y_input_df_2, Y_input_df_3, Y_input_df_4, Y_input_df_5]

# Function to plot predictions for a given unique_id
def plot_model_predictions(unique_id, Y_input_dfs, Y_pred_dfs, model_list, history_days=100, grid_shape='3x2'):
    # Parse grid_shape into nrows and ncols
    if grid_shape == '3x2':
        nrows, ncols = 3, 2
    elif grid_shape == '6x1':
        nrows, ncols = 6, 1
    else:
        # Default to 6x1 if invalid input
        nrows, ncols = 6, 1

    # Prepare to find overall x and y limits
    all_dates = []
    all_values = []

    # Define colors for the different regions
    colors = {
        'train': '#a6bddb',
        'input': '#fd8d3c',
        'test': '#feb24c',
    }

    # Distinct model colors
    colors_list = ['purple', 'orange', 'cyan', 'magenta', 'brown', 'red', 'green', 'olive', 'navy', 'teal']
    
    # Create a mapping from model names to colors
    model_colors = dict(zip(model_list, colors_list))

    # First, determine the cutoff_date (end of train data)
    # We'll use the last date from the first Y_input_df
    Y_input_df_first = Y_input_dfs[0].copy()
    Y_input_df_first['ds'] = pd.to_datetime(Y_input_df_first['ds'])
    if Y_input_df_first.index.name != 'unique_id':
        Y_input_df_first = Y_input_df_first.set_index('unique_id')
    try:
        Y_input_ts_first = Y_input_df_first.loc[unique_id].copy()
    except KeyError:
        print(f"unique_id '{unique_id}' not found in the first Y_input_df")
        return
    if isinstance(Y_input_ts_first, pd.Series):
        Y_input_ts_first = Y_input_ts_first.to_frame().T
    cutoff_date = Y_input_ts_first['ds'].max()

    # Calculate start date for historical data
    history_start_date = cutoff_date - pd.Timedelta(days=history_days)

    # Collect all dates and values for axis limits
    for i, period in enumerate(periods):
        # Get input data for the period
        Y_input_df = Y_input_dfs[i].copy()
        Y_input_df['ds'] = pd.to_datetime(Y_input_df['ds'])

        # Ensure 'unique_id' is the index
        if Y_input_df.index.name != 'unique_id':
            Y_input_df = Y_input_df.set_index('unique_id')

        # Get prediction data for the period
        Y_pred_df = Y_pred_dfs[period].copy()
        Y_pred_df['ds'] = pd.to_datetime(Y_pred_df['ds'])

        # Ensure 'unique_id' is the index
        if Y_pred_df.index.name != 'unique_id':
            Y_pred_df = Y_pred_df.set_index('unique_id')

        # Filter data for the specific unique_id
        try:
            Y_input_ts = Y_input_df.loc[unique_id].copy()
        except KeyError:
            continue

        try:
            Y_pred_ts = Y_pred_df.loc[unique_id].copy()
        except KeyError:
            continue

        # If the result is a Series (only one entry), convert it to DataFrame
        if isinstance(Y_input_ts, pd.Series):
            Y_input_ts = Y_input_ts.to_frame().T

        if isinstance(Y_pred_ts, pd.Series):
            Y_pred_ts = Y_pred_ts.to_frame().T

        # Determine the start and end date of the test period for this period
        test_start_date = Y_pred_ts['ds'].min()
        test_end_date = Y_pred_ts['ds'].max()

        # Limit historical data to the specified number of days before test_start_date
        Y_input_ts = Y_input_ts[(Y_input_ts['ds'] >= history_start_date) & (Y_input_ts['ds'] <= test_start_date)]

        # Collect dates and values
        all_dates.extend(Y_input_ts['ds'].tolist())
        all_values.extend(Y_input_ts['y'].tolist())
        all_dates.extend(Y_pred_ts['ds'].tolist())
        all_values.extend(Y_pred_ts['y'].tolist())

        # Collect values from model predictions
        for model in model_list:
            if model in Y_pred_ts.columns:
                all_values.extend(Y_pred_ts[model].tolist())

    # Determine overall x and y limits with padding
    x_min = min(all_dates)
    x_max = max(all_dates)
    y_min = min(all_values)
    y_max = max(all_values)

    # Calculate padding
    x_range = x_max - x_min
    y_range = y_max - y_min
    x_padding = x_range * 0.05  # 5% padding on x-axis
    y_padding = y_range * 0.05  # 5% padding on y-axis

    # Adjust x_min and x_max with padding
    x_min_padded = x_min - x_padding
    x_max_padded = x_max + x_padding

    # Adjust y_min and y_max with padding
    y_min_padded = y_min - y_padding
    y_max_padded = y_max + y_padding

    # Create the grid of subplots based on grid_shape
    fig, axs = plt.subplots(nrows, ncols, figsize=(18, 5 * nrows))
    axs = axs.flatten()  # Flatten to easily index subplots

    for i, period in enumerate(periods):
        ax = axs[i]

        # Get input data for the period
        Y_input_df = Y_input_dfs[i].copy()
        Y_input_df['ds'] = pd.to_datetime(Y_input_df['ds'])

        # Ensure 'unique_id' is the index
        if Y_input_df.index.name != 'unique_id':
            Y_input_df = Y_input_df.set_index('unique_id')

        # Get prediction data for the period
        Y_pred_df = Y_pred_dfs[period].copy()
        Y_pred_df['ds'] = pd.to_datetime(Y_pred_df['ds'])

        # Ensure 'unique_id' is the index
        if Y_pred_df.index.name != 'unique_id':
            Y_pred_df = Y_pred_df.set_index('unique_id')

        # Filter data for the specific unique_id
        try:
            Y_input_ts = Y_input_df.loc[unique_id].copy()
        except KeyError:
            print(f"unique_id '{unique_id}' not found in Y_input_df for period '{period}'")
            continue

        try:
            Y_pred_ts = Y_pred_df.loc[unique_id].copy()
        except KeyError:
            print(f"unique_id '{unique_id}' not found in Y_pred_df for period '{period}'")
            continue

        # If the result is a Series (only one entry), convert it to DataFrame
        if isinstance(Y_input_ts, pd.Series):
            Y_input_ts = Y_input_ts.to_frame().T

        if isinstance(Y_pred_ts, pd.Series):
            Y_pred_ts = Y_pred_ts.to_frame().T

        # Determine the start and end date of the test period for this period
        test_start_date = Y_pred_ts['ds'].min()
        test_end_date = Y_pred_ts['ds'].max()

        # Limit historical data to the specified number of days before test_start_date
        Y_input_ts = Y_input_ts[(Y_input_ts['ds'] >= history_start_date) & (Y_input_ts['ds'] <= test_start_date)]

        # Sort by date
        Y_input_ts = Y_input_ts.sort_values('ds')
        Y_pred_ts = Y_pred_ts.sort_values('ds')

        # Combine historical and prediction data for continuous plotting
        combined_ts = pd.concat([Y_input_ts, Y_pred_ts], ignore_index=True)
        combined_ts = combined_ts.sort_values('ds')

        # Plot the historical data (before the test period)
        historical_data = combined_ts[combined_ts['ds'] <= test_start_date]
        ax.plot(historical_data['ds'], historical_data['y'], color='blue', linewidth=1)

        # Plot the actual data in the test period
        actual_test_data = combined_ts[(combined_ts['ds'] >= test_start_date) & (combined_ts['ds'] <= test_end_date)]
        ax.plot(actual_test_data['ds'], actual_test_data['y'], color='blue', linewidth=1, linestyle='--')

        # Add train data fill (diagonal lines) up to the cutoff_date
        train_data = historical_data[historical_data['ds'] <= cutoff_date]
        ax.fill_between(train_data['ds'], y_min, y_max,
                        facecolor='none', edgecolor=colors['train'], hatch='//', linewidth=0, alpha=0.5, label='Train Data')
        
        # Fill between for input data (from cutoff_date to test_start_date)
        input_data = historical_data
        ax.fill_between(input_data['ds'], y_min, y_max,
                        facecolor=colors['input'], alpha=0.1, label='Input Data')

        # Fill between for test data
        ax.fill_between(actual_test_data['ds'], y_min, y_max,
                        facecolor=colors['test'], alpha=0.2, label='Test Data')

        # Plot the model predictions
        for model in model_list:
            if model in Y_pred_ts.columns:
                color = model_colors.get(model, 'black')
                ax.plot(Y_pred_ts['ds'], Y_pred_ts[model], label=model, linewidth=0.9, color=color)
            else:
                print(f"Model '{model}' not found in predictions for period '{period}'")

        # Add vertical line to indicate the cutoff date (same for all plots)
        ax.axvline(cutoff_date, color='black', linestyle='dashdot', linewidth=0.75)

        # Add vertical gray dashed lines at the start and end of the test period
        ax.axvline(test_start_date, color='gray', linestyle='--', linewidth=0.75)
        ax.axvline(test_end_date, color='gray', linestyle='--', linewidth=0.75)

        # Set x and y limits with padding
        ax.set_xlim([x_min_padded, x_max_padded])
        ax.set_ylim([y_min_padded, y_max_padded])

        # Set title
        ax.set_title(f'Predictions for Period {i+1}')

        # Remove x-axis labels and ticks for all plots except the bottom row
        if i < (nrows - 1) * ncols:
            ax.set_xlabel('')
            ax.set_xticklabels([])
            ax.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)
        else:
            ax.set_xlabel('Date')

        # Remove y-axis labels and ticks for plots on the right side
        if ncols > 1 and (i % ncols) == (ncols - 1):
            ax.set_ylabel('')
            ax.set_yticklabels([])
            ax.tick_params(axis='y', which='both', left=False, right=False, labelleft=False)
        else:
            ax.set_ylabel('Value')

        # Collect handles and labels for the legend
        handles, labels = ax.get_legend_handles_labels()

        # Filter out unwanted labels
        desired_labels = ['Train Data', 'Input Data', 'Test Data'] + model_list
        handles_labels = [(h, l) for h, l in zip(handles, labels) if l in desired_labels]

        # Update the legend
        if handles_labels:
            handles, labels = zip(*handles_labels)
            by_label = dict(zip(labels, handles))
            if period == 'period06':
                ax.legend(by_label.values(), by_label.keys(), loc='lower left')
            else:
                ax.legend(by_label.values(), by_label.keys(), loc='upper right')
        else:
            ax.legend().set_visible(False)

    # Remove any empty subplots if periods < total subplots
    for j in range(i+1, len(axs)):
        fig.delaxes(axs[j])

    # Adjust layout
    plt.tight_layout()
    plt.savefig('models/figures/period_plot.png', dpi=300)
    
    plt.show()

# Create the plot
unique_id = Y_input_df_0['unique_id'][0]  
model_list = ['PatchTST']

# Choose grid_shape='6x1' or '3x2'
plot_model_predictions(unique_id, Y_input_dfs, Y_pred_dfs, model_list, history_days=100, grid_shape='6x1')

## 6. Out-Sample Evaluation

### 6.1 Metrics for the outsample predictions

In [None]:
# Initialize an empty dictionary to store evaluation dataframes
evaluations = {}

# Loop over the 6 periods
for i in range(6):
    print(f"Calculating metrics for period{i+1:02d}...")
    
    # Get the period name
    period_name = f'period{i+1:02d}'
    
    # Get the prediction dataframe for this period
    Y_pred_df = Y_pred_dfs[period_name]
    
    # Get the corresponding test dataframe
    Y_test_df = Y_test_dfs[i]
    
    # Perform evaluation
    eval_1_day, eval_7_days, eval_14_days, eval_30_days = ts_utils.perform_evaluation(Y_train_df, Y_test_df, Y_pred_df)

    print(f"Saving metrics for {period_name}..")
    
    # Save the evaluation dataframes to CSV files
    eval_1_day.to_csv(f"metrics/outsample/{period_name}/metrics_1_day_{data_date}_{data_size}.csv", index=False)
    eval_7_days.to_csv(f"metrics/outsample/{period_name}/metrics_7_day_{data_date}_{data_size}.csv", index=False)
    eval_14_days.to_csv(f"metrics/outsample/{period_name}/metrics_14_day_{data_date}_{data_size}.csv", index=False)
    eval_30_days.to_csv(f"metrics/outsample/{period_name}/metrics_30_day_{data_date}_{data_size}.csv", index=False)
    
    # Store the evaluation dataframes in a dictionary
    evaluations[period_name] = {
        'eval_1_day': eval_1_day,
        'eval_7_days': eval_7_days,
        'eval_14_days': eval_14_days,
        'eval_30_days': eval_30_days
    }