# Timeseries Forecasting on Transaction Data

## 1. Installing Dependencies

In [None]:
# # Pip for evaluation metrics
# !pip install datasetsforecast
# !pip install sktime
# !pip install EntropyHub

In [None]:
# Basics
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from tqdm import tqdm

# Some functions for plotting and stuff
import ts_utils as ts_utils

## 2. Data Preparation

In [None]:
# Size of the data to read
data_size = 'full'

# Date of the data to read
data_date = '2110' # '2110' = 21st of October

# Read the data (takes around 2 minutes)
dataset = pd.read_csv(f"~/Thesis/data/eod_balances_{data_date}_{data_size}.csv")

dataset

### 2.1 In-sample and Out-sample split

In [None]:
# Calculate total amount of timeseries
num_timeseries = len(dataset.columns) - 1

# Specify train test split percentage
train_test_split = 0.8

# Split into train and out of sample test data
num_out_of_sample = int(train_test_split * num_timeseries)

# Create in-sample dataframe
in_sample_data = dataset.iloc[:, : num_out_of_sample + 1] # Training and testing

# Create out-sample dataframe
n = num_timeseries-num_out_of_sample
columns_to_keep = dataset.columns[[0]].tolist() + dataset.columns[-n:].tolist()
out_sample_data = dataset[columns_to_keep]

## 3. In-sample analysis

### 3.1 Train/Test splitting and plotting

In [None]:
# Change the data to the long format
Y_df = in_sample_data.melt(id_vars=['date'], var_name='unique_id', value_name='y')
Y_df = Y_df.rename(columns={'date':'ds'})

# Convert date column to datetime type
Y_df['ds'] = pd.to_datetime(Y_df['ds'])

In [None]:
# Define the horizon (12 months of 30 days each)
fh = 30
horizon = 12 * fh

# Identify the unique dates in the dataset
unique_dates = Y_df['ds'].unique()

# Convert to a list and then sort the dates
unique_dates = sorted(list(unique_dates))

# Determine the cutoff date (cutoff at 12 months before the last date in the dataset)
cutoff_date = unique_dates[-(horizon + 1)]

# Training data: all data up to the cutoff date
Y_train_df = Y_df[Y_df['ds'] <= cutoff_date]

In [None]:
# Initialize lists to store the input and test sets
input_dfs = []
test_dfs = []

# Loop to create the 6 input and test sets
for i in range(6):
    # Determine the start date of the test period
    test_start_date = unique_dates[-(horizon - i * 2 * fh)]
    test_end_date = unique_dates[-(horizon - (i * 2 * fh) - fh)]
    
    # Input data: all data up to the start of the current test period
    input_df = Y_df[Y_df['ds'] <= test_start_date]
    input_dfs.append(input_df)
    
    # Test data: the 30-day period following the start of the test period
    test_df = Y_df[(Y_df['ds'] > test_start_date) & (Y_df['ds'] <= test_end_date)]
    test_dfs.append(test_df)

# Define the 6 input periods
Y_input_df_0 = input_dfs[0]
Y_input_df_1 = input_dfs[1]
Y_input_df_2 = input_dfs[2]
Y_input_df_3 = input_dfs[3]
Y_input_df_4 = input_dfs[4]
Y_input_df_5 = input_dfs[5]

# Define the 6 test periods
Y_test_df_0 = test_dfs[0]
Y_test_df_1 = test_dfs[1]
Y_test_df_2 = test_dfs[2]
Y_test_df_3 = test_dfs[3]
Y_test_df_4 = test_dfs[4]
Y_test_df_5 = test_dfs[5]

In [None]:
# Timeserie to plot
unique_id = '17'

# Plot the train and test dataframes
ts_utils.plot_train_test_split(Y_input_df_0, Y_test_df_0, unique_id)

### 3.2 Retrieve Predictions

In [None]:
# Define the models and periods
models = ['Naive', 'ARIMA', 'ETS', 'NHITS', 'PatchTST', 'TimesNet', 'DeepAR', 'Chronos-small', 'Chronos-large', 'Chronos-FT']
periods = ['period01', 'period02', 'period03', 'period04', 'period05', 'period06']

# Create Y_test_dfs as a list of test dataframes for each period
Y_test_dfs = [Y_test_df_0, Y_test_df_1, Y_test_df_2, Y_test_df_3, Y_test_df_4, Y_test_df_5]

# Initialize a dictionary to hold the prediction dataframes for each period
Y_pred_dfs = {}

In [None]:
# Adjusted merging_preds function
def merging_preds(Y_pred_df, model_preds, model_name):
    # Ensure 'unique_id' is string and 'ds' is datetime
    model_preds['unique_id'] = model_preds['unique_id'].astype('string')
    model_preds['ds'] = pd.to_datetime(model_preds['ds'])
    
    # Merge predictions on 'unique_id' and 'ds'
    Y_pred_df = Y_pred_df.merge(model_preds[['unique_id', 'ds', f'{model_name}']], on=['unique_id', 'ds'], how='left')
    
    return Y_pred_df

In [None]:
# Loop over periods to get predictions
for i, period in enumerate(periods):
    print(f"Processing {period}...")
    
    # Get the test dataframe for this period
    Y_pred_df = Y_test_dfs[i].copy()
    
    # Ensure 'unique_id' is string and 'ds' is datetime
    Y_pred_df['unique_id'] = Y_pred_df['unique_id'].astype('string')
    Y_pred_df['ds'] = pd.to_datetime(Y_pred_df['ds'])
    
    # Loop over models to merge predictions
    for model in models:
        # Read the prediction csv
        model_preds = pd.read_csv(f"predictions/{model}/insample/{period}/model_preds_{data_date}_{data_size}.csv")
        
        # Merge the predictions into Y_pred_df
        Y_pred_df = merging_preds(Y_pred_df, model_preds, model)

    # Set 'unique_id' as index if needed
    Y_pred_df = Y_pred_df.set_index('unique_id')
    
    # Rename columns if necessary
    Y_pred_df = Y_pred_df.rename(columns={"Chronos-small": "Chronos (small)", "Chronos-large": "Chronos (large)"})
    
    # Store the dataframe in the dictionary
    Y_pred_dfs[period] = Y_pred_df

## 4. Evaluation

### 4.1 Visually plot forecasts

In [None]:
# Specify models to plot
model_names = ['ARIMA', 'PatchTST', 'NHITS', 'Chronos (large)']

# Specify accounts to plot
unique_ids = ['1', '2', '3', '4', '5']

# Plot the predictions
ts_utils.plot_multiple_model_forecasts(Y_train_df, Y_test_df_0, Y_pred_dfs['period01'].reset_index(), model_names, unique_ids)

In [None]:
# Specify models to plot
model_names = ['ARIMA', 'PatchTST', 'Chronos-small', 'Chronos-FT']

# Specify accounts to plot
unique_ids = [str(i) for i in np.random.randint(0, 20, size=3)]

# Plot the predictions
ts_utils.plot_multiple_model_forecasts(Y_train_df, Y_test_df_0, Y_pred_dfs['period01'].reset_index(), model_names, unique_ids)

In [None]:
# Specify models to plot
model_names = ['Chronos (large)']

# Specify accounts to plot
unique_ids = ['28', '27', '26', '25', '24', '23']

# Plot the predictions
ts_utils.plot_multiple_model_forecasts(Y_train_df, Y_test_df_0, Y_pred_dfs['period01'].reset_index(), model_names, unique_ids)

### 4.1 Retrieve the metrics

In [None]:
# Define periods and horizons
periods = [f'period{i+1:02d}' for i in range(6)]  # ['period01', 'period02', ..., 'period06']
horizons = ['1_day', '7_day', '14_day', '30_day']

# Initialize a nested dictionary to store dataframes
metrics = {}

# Loop over periods and horizons to read the dataframes
for period_name in periods:
    metrics[period_name] = {}
    for horizon in horizons:
        # Construct the filename
        filename = f"metrics/insample/{period_name}/metrics_{horizon}_{data_date}_{data_size}.csv"
        # Read the CSV file
        df = pd.read_csv(filename)
        # Ensure 'unique_id' is of type string
        df['unique_id'] = df['unique_id'].astype('string')
        # Store the dataframe
        metrics[period_name][horizon] = df

# Initialize a dictionary to store the summarized metrics for each period
metric_results = {}

# Loop over each period to summarize the metrics
for period_name in periods:
    # Retrieve the four dataframes for the current period
    df1 = metrics[period_name]['1_day']
    df2 = metrics[period_name]['7_day']
    df3 = metrics[period_name]['14_day']
    df4 = metrics[period_name]['30_day']
    
    # Summarize the metrics using ts_utils.summarize_metrics()
    result = ts_utils.summarize_metrics(df1, df2, df3, df4)
    
    # Store the result
    metric_results[period_name] = result

In [None]:
# metric_results['period01'], metric_results['period02'], ..., metric_results['period06']
metric_results['period02']

### 4.1.2 Filtering the MAPES

In [None]:
# Define the filter_high_mape function with detailed print statements
def filter_high_mape(eval_df, Y_test_df, mape_threshold=500, approx_0_threshold=100, debug=False):
    # Step 1: Identify unique_ids where MAPE exceeds the threshold
    exceeding_error_ids = eval_df[
        (eval_df['metric'] == 'mape') & 
        (eval_df['PatchTST'] > mape_threshold)
    ]['unique_id'].unique()
    
    if debug:
        print(f"    Unique IDs with MAPE > {mape_threshold}: {len(exceeding_error_ids)}")
        print(f"    Unique IDs exceeding MAPE threshold: {list(exceeding_error_ids)}")

    # Step 2: Identify unique_ids where y is close to zero
    filtered_ids = Y_test_df[
        (Y_test_df['y'] >= -approx_0_threshold) & 
        (Y_test_df['y'] <= approx_0_threshold)
    ]['unique_id'].unique()
    
    if debug:
        print(f"    Unique IDs with y close to zero (|y| <= {approx_0_threshold}): {len(filtered_ids)}")
        print(f"    Unique IDs with y close to zero: {list(filtered_ids)}")

    # Step 3: Find intersection of IDs to remove
    final_unique_ids = set(exceeding_error_ids).intersection(set(filtered_ids))

    if debug:
        print(f"    Unique IDs to remove (intersection): {len(final_unique_ids)}")
        print(f"    Unique IDs to remove: {list(final_unique_ids)}")

    # Step 4: Remove these unique_ids from eval_df
    filtered_eval_df = eval_df[~eval_df['unique_id'].isin(final_unique_ids)]

    if debug:
        print(f"    Filtered eval_df has {len(filtered_eval_df)} rows (original had {len(eval_df)} rows)")

    return filtered_eval_df

# Define periods and horizons
periods = [f'period{i+1:02d}' for i in range(6)]  # ['period01', 'period02', ..., 'period06']
horizons = ['1_day', '7_day', '14_day', '30_day']

# Initialize nested dictionaries to store original and filtered dataframes
metrics = {}
filtered_metrics = {}

# Create a mapping of period names to their corresponding Y_test_df
Y_test_dfs = {f'period{i+1:02d}': test_dfs[i] for i in range(6)}

In [None]:
# Loop over periods and horizons to read the dataframes and apply filtering
for period_name in periods:
    print(f"Processing {period_name}...")
    metrics[period_name] = {}
    filtered_metrics[period_name] = {}
    
    # Get the corresponding Y_test_df for the current period
    Y_test_df = Y_test_dfs[period_name]
    
    for horizon in horizons:
        # Construct the filename
        filename = f"metrics/insample/{period_name}/metrics_{horizon}_{data_date}_{data_size}.csv"
        
        # Read the CSV file
        df = pd.read_csv(filename)
        
        # Ensure 'unique_id' is of type string
        df['unique_id'] = df['unique_id'].astype('string')
        # Store the original dataframe
        metrics[period_name][horizon] = df
        
        # Apply the filter_high_mape function
        filtered_df = filter_high_mape(df, Y_test_df)
        
        # Store the filtered dataframe
        filtered_metrics[period_name][horizon] = filtered_df

In [None]:
# Initialize a dictionary to store the summarized filtered metrics for each period
metric_results_filtered = {}

# Loop over each period to summarize the filtered metrics
for period_name in periods:
    # Retrieve the four filtered dataframes for the current period
    df1 = filtered_metrics[period_name]['1_day']
    df2 = filtered_metrics[period_name]['7_day']
    df3 = filtered_metrics[period_name]['14_day']
    df4 = filtered_metrics[period_name]['30_day']
    
    # Summarize the metrics using ts_utils.summarize_metrics()
    result = ts_utils.summarize_metrics(df1, df2, df3, df4)
    
    # Store the result
    metric_results_filtered[period_name] = result
    print(f"Finished summarizing metrics for {period_name}")

In [None]:
metric_results_filtered['period01']

### 4.1.2 Average and standard deviation over periods

In [None]:
# Step 1: Collect the dataframes into a list
print("Collecting dataframes from 'metric_results_filtered'...")
dfs = []
for period_name in periods:
    df_styler = metric_results_filtered[period_name]
    df = df_styler.data  # Extract the DataFrame from the Styler object
    print(f"  Collected dataframe for {period_name} with shape {df.shape}")
    dfs.append(df)

# Step 2: Concatenate the dataframes along a new axis with keys as period names
print("\nConcatenating dataframes and adding 'period' as a new index level...")
combined_df = pd.concat(dfs, keys=periods, names=['period'])
print(f"Combined dataframe shape: {combined_df.shape}")
print("Combined dataframe index levels:", combined_df.index.names)
print("Combined dataframe columns:", combined_df.columns.tolist())

# Step 3: Compute the mean and standard deviation across periods for each metric and horizon
print("\nComputing mean and standard deviation across periods...")
mean_filtered_metrics = combined_df.groupby(['metric', 'horizon']).mean()
stds_metrics = combined_df.groupby(['metric', 'horizon']).std()

print(f"Filtered metrics (mean) dataframe shape: {mean_filtered_metrics.shape}")
print(f"Standard deviations dataframe shape: {stds_metrics.shape}")

In [None]:
# Apply styling to the averaged metrics when displaying
line_separator = [{'selector': 'tr', 'props': [('border-bottom', '1px solid black')]}]
styled_filtered_metrics = mean_filtered_metrics.style.highlight_min(color='palegreen', axis=1)
styled_filtered_metrics = styled_filtered_metrics.set_table_styles(line_separator, overwrite=False)
display(styled_filtered_metrics)

In [None]:
# Show also the standard deviations of the metrics
styled_stds_metrics = stds_metrics.style.set_table_styles(line_separator, overwrite=False)
display(styled_stds_metrics)

### 4.2 Plot the metric distributions

In [None]:
# Loop over each horizon
for horizon in horizons:
    print(f"\nProcessing horizon: {horizon}")
    
    # Collect the set of unique_ids present in each period for the current horizon
    unique_ids_per_period = []
    for period in periods:
        df = filtered_metrics[period][horizon]
        unique_ids = set(df['unique_id'])
        print(f"  Period {period} has {len(unique_ids)} unique_ids for horizon {horizon}")
        unique_ids_per_period.append(unique_ids)
    
    # Find unique_ids present in all periods
    common_unique_ids = set.intersection(*unique_ids_per_period)
    print(f"  {len(common_unique_ids)} unique_ids are present in all periods for horizon {horizon}")
    print(f"  Unique IDs present in all periods: {sorted(common_unique_ids)}")
    
    # Remove unique_ids not present in all periods from each period's dataframe
    dfs = []
    for period in periods:
        df = filtered_metrics[period][horizon]
        # Filter to keep only common_unique_ids
        df_filtered = df[df['unique_id'].isin(common_unique_ids)].copy()
        # Add period identifier
        df_filtered['period'] = period
        dfs.append(df_filtered)
        print(f"    After filtering, period {period} has {len(df_filtered)} rows for horizon {horizon}")
    
    # Concatenate dataframes from all periods for the current horizon
    combined_df = pd.concat(dfs, ignore_index=True)
    print(f"  Combined dataframe for horizon {horizon} has {len(combined_df)} rows")
    
    # Reshape the dataframe to compute mean and std for each unique_id, metric, and model
    # Identify model columns (exclude 'unique_id', 'metric', and 'period')
    id_vars = ['unique_id', 'metric', 'period']
    model_columns = [col for col in combined_df.columns if col not in id_vars]
    print(f"  Models considered: {model_columns}")
    
    # Melt the dataframe to have models in one column
    melted_df = pd.melt(combined_df, id_vars=id_vars, value_vars=model_columns,
                        var_name='model', value_name='value')
    print(f"  Melted dataframe has {len(melted_df)} rows")
    
    # Group by 'unique_id', 'metric', 'model' and compute mean and std over periods
    grouped = melted_df.groupby(['unique_id', 'metric', 'model'])['value'].agg(['mean', 'std']).reset_index()
    print(f"  Computed mean and std for {len(grouped)} groups")
    
    # Pivot the grouped dataframe to get models as columns
    # For mean values
    mean_df = grouped.pivot_table(index=['unique_id', 'metric'], columns='model', values='mean').reset_index()
    # For standard deviation values
    std_df = grouped.pivot_table(index=['unique_id', 'metric'], columns='model', values='std').reset_index()
    
    # Store the dataframes with appropriate variable names
    if horizon == '1_day':
        eval_1_day_filtered = mean_df
        eval_1_day_filtered_std = std_df
    elif horizon == '7_day':
        eval_7_days_filtered = mean_df
        eval_7_days_filtered_std = std_df
    elif horizon == '14_day':
        eval_14_days_filtered = mean_df
        eval_14_days_filtered_std = std_df
    elif horizon == '30_day':
        eval_30_days_filtered = mean_df
        eval_30_days_filtered_std = std_df
    
    print(f"  Stored mean and std dataframes for horizon {horizon}")

print("\nProcessing complete.")

In [None]:
# The models of which I want to see the metric distribution
models_to_plot = ['ARIMA', 'PatchTST', 'Chronos (large)']

# The metric of which I want to see the distribution of
metric = 'mape'

# Horizons to include in the plots
horizons = ['7 days', '14 days', '30 days']

# The evaluation dataframes
eval_dfs = ts_utils.define_eval_dfs(eval_1_day_filtered, eval_7_days_filtered, 
                                    eval_14_days_filtered, eval_30_days_filtered, horizons)

# Plot the distributions
ts_utils.plot_metric_distribution(eval_dfs, models_to_plot, metric, Y_test_df, horizons)

### 4.3 Plotting metrics over time

In [None]:
def plot_metrics_academic_style(mean_df, std_df):
    import matplotlib.pyplot as plt
    import numpy as np
    
    # Ensure dataframes are correctly formatted
    if isinstance(mean_df, pd.io.formats.style.Styler):
        mean_df = mean_df.data
    if isinstance(std_df, pd.io.formats.style.Styler):
        std_df = std_df.data
    
    # Get list of metrics
    metrics = mean_df.index.get_level_values('metric').unique()
    
    # Get list of horizons and map them to numerical values
    horizon_labels = mean_df.index.get_level_values('horizon').unique()
    # Convert horizons to numerical days
    horizon_mapping = {'01 day':1, '07 days':7, '14 days':14, '30 days':30}
    horizons = [horizon_mapping[h] for h in horizon_labels]
    
    # Get list of models
    models = mean_df.columns.tolist()
    models = ['Naive', 'Chronos (large)']
    
    # Set up plot styles
    plt.rcParams.update({'font.size': 12})  # Increase font size for readability
    markers = ['o', 's', '^', 'D', 'v', 'P', '*', 'X']  # Unique markers for up to 8 models
    linestyles = ['-', '--', '-.', ':']  # Different line styles
    colors = ['black', 'blue', 'green', 'red', 'purple', 'brown', 'orange', 'gray']
    
    # Create subplots
    fig, axs = plt.subplots(2, 2, figsize=(12, 10))
    axs = axs.flatten()
    
    for i, metric in enumerate(metrics):
        ax = axs[i]
        
        # Extract data for the current metric
        metric_mean = mean_df.loc[metric]
        metric_std = std_df.loc[metric]
        
        # For each model, plot the mean and standard deviation over horizons
        for j, model in enumerate(models):
            # Get mean and std values for this model
            mean_values = metric_mean[model].values
            std_values = metric_std[model].values
            
            # Plot the mean values with error bars
            ax.errorbar(horizons, mean_values, yerr=std_values, label=model,
                        color=colors[j % len(colors)],
                        marker=markers[j % len(markers)],
                        linestyle=linestyles[j % len(linestyles)],
                        linewidth=1.5, markersize=6, capsize=3, elinewidth=1)
            
        # Set labels and title
        ax.set_title(f'{metric.upper()} over Forecasting Horizons', fontsize=14)
        ax.set_xlabel('Forecasting Horizon (days)', fontsize=12)
        ax.set_ylabel(metric.upper(), fontsize=12)
        ax.set_xticks(horizons)
        ax.set_xticklabels([str(h) for h in horizons])
        ax.grid(True, which='both', linestyle='--', linewidth=0.5)
    
    # Adjust layout to make space for the legend
    plt.tight_layout(rect=[0, 0, 1, 0.93])
    
    # Create a single legend for all subplots
    handles, labels = axs[0].get_legend_handles_labels()
    fig.legend(handles, labels, loc='upper center', ncol=len(models), bbox_to_anchor=(0.5, 1.02), fontsize=12)
    
    # Save the figure with high resolution
    plt.savefig('metrics_over_time.png', dpi=300, bbox_inches='tight')
    plt.show()

In [None]:
plot_metrics_academic_style(mean_filtered_metrics, stds_metrics)

In [None]:
ts_utils.plot_metrics_over_time(mean_filtered_metrics)

### 4.4 Plotting model metrics against baselines

In [None]:
# Model to compare against baselines
model = 'Chronos (large)'

# Metric to plot (can be mae, mape, rmse or rmsse)
metric = 'mape'

# Baselines to use (has to be 3)
baselines = ['PatchTST', 'NHITS', 'TimesNet']

# Function for plotting
ts_utils.compare_single_model_rmsse(eval_30_days_filtered, model, metric, 30, baselines, outlier_percentage=5)

In [None]:
# Model to compare against baselines
model = 'Chronos (large)'

# Metric to plot (can be mae, mape, rmse or rmsse)
metric = 'mape'

# Baselines to use (has to be 3)
baselines = ['Naive', 'ARIMA', 'ETS']

# Function for plotting
ts_utils.compare_single_model_rmsse(eval_30_days_filtered, model, metric, 30, baselines, outlier_percentage=10)

## 4.5 Plotting clustered performances

In [None]:
# Calculate the groups
group_df = ts_utils.GroupSeries(in_sample_data, num_groups=8, group_method='ApproxEntropy')

# Merge the groups with the 4 dataframes
eval_1_day_grouped = eval_1_day_filtered.merge(group_df, on='unique_id', how='left')
eval_7_days_grouped = eval_7_days_filtered.merge(group_df, on='unique_id', how='left')
eval_14_days_grouped = eval_14_days_filtered.merge(group_df, on='unique_id', how='left')
eval_30_days_grouped = eval_30_days_filtered.merge(group_df, on='unique_id', how='left')

eval_1_day_grouped

In [None]:
# Plot the groups
ts_utils.PlotGroups(in_sample_data, eval_30_days_grouped)

In [None]:
# List of dataframes and corresponding forecasting horizons
dataframes = [eval_1_day_grouped, eval_7_days_grouped, eval_14_days_grouped, eval_30_days_grouped]
horizons = [1, 7, 14, 30]

# Models we want to plot
models = ['Naive', 'TimesNet', 'PatchTST', 'NHITS', 'Chronos (small)', 'Chronos (large)']

ts_utils.PlotGroupPerformance(dataframes, models, horizons)

## 4.6 In vs Out-sample

In [None]:
# Change the data to the long format
Y_df_out = out_sample_data.melt(id_vars=['date'], var_name='unique_id', value_name='y')
Y_df_out = Y_df_out.rename(columns={'date':'ds'})

# Convert date column to datetime type
Y_df_out['ds'] = pd.to_datetime(Y_df_out['ds'])

In [None]:
# Define the horizon (12 months of 30 days each)
fh = 30
horizon = 12 * fh

# Identify the unique dates in the dataset
unique_dates = Y_df_out['ds'].unique()

# Convert to a list and then sort the dates
unique_dates = sorted(list(unique_dates))

# Determine the cutoff date (cutoff at 12 months before the last date in the dataset)
cutoff_date = unique_dates[-(horizon + 1)]

# Training data: all data up to the cutoff date
Y_train_df_out = Y_df_out[Y_df_out['ds'] <= cutoff_date]

In [None]:
# Initialize lists to store the input and test sets
input_dfs_out = []
test_dfs_out = []

# Loop to create the 6 input and test sets
for i in range(6):
    # Determine the start date of the test period
    test_start_date = unique_dates[-(horizon - i * 2 * fh)]
    test_end_date = unique_dates[-(horizon - (i * 2 * fh) - fh)]
    
    # Input data: all data up to the start of the current test period
    input_df = Y_df_out[Y_df_out['ds'] <= test_start_date]
    input_dfs_out.append(input_df)
    
    # Test data: the 30-day period following the start of the test period
    test_df = Y_df_out[(Y_df_out['ds'] > test_start_date) & (Y_df_out['ds'] <= test_end_date)]
    test_dfs_out.append(test_df)

# Define the 6 input periods
Y_input_df_0_out = input_dfs_out[0]
Y_input_df_1_out = input_dfs_out[1]
Y_input_df_2_out = input_dfs_out[2]
Y_input_df_3_out = input_dfs_out[3]
Y_input_df_4_out = input_dfs_out[4]
Y_input_df_5_out = input_dfs_out[5]

# Define the 6 test periods
Y_test_df_0_out = test_dfs_out[0]
Y_test_df_1_out = test_dfs_out[1]
Y_test_df_2_out = test_dfs_out[2]
Y_test_df_3_out = test_dfs_out[3]
Y_test_df_4_out = test_dfs_out[4]
Y_test_df_5_out = test_dfs_out[5]

In [None]:
# Define periods and horizons
periods = [f'period{i+1:02d}' for i in range(6)]  # ['period01', 'period02', ..., 'period06']
horizons = ['1_day', '7_day', '14_day', '30_day']

# Initialize a nested dictionary to store dataframes
metrics_out = {}

# Loop over periods and horizons to read the dataframes
for period_name in periods:
    metrics_out[period_name] = {}
    for horizon in horizons:
        # Construct the filename
        filename = f"metrics/outsample/{period_name}/metrics_{horizon}_{data_date}_{data_size}.csv"
        # Read the CSV file
        df = pd.read_csv(filename)
        # Ensure 'unique_id' is of type string
        df['unique_id'] = df['unique_id'].astype('string')
        # Store the dataframe
        metrics_out[period_name][horizon] = df

# Initialize a dictionary to store the summarized metrics for each period
metric_results_out = {}

# Loop over each period to summarize the metrics
for period_name in periods:
    # Retrieve the four dataframes for the current period
    df1 = metrics_out[period_name]['1_day']
    df2 = metrics_out[period_name]['7_day']
    df3 = metrics_out[period_name]['14_day']
    df4 = metrics_out[period_name]['30_day']
    
    # Summarize the metrics using ts_utils.summarize_metrics()
    result = ts_utils.summarize_metrics(df1, df2, df3, df4)
    
    # Store the result
    metric_results_out[period_name] = result

metric_results_out['period01']

In [None]:
# Initialize nested dictionaries to store original and filtered dataframes
metrics_out = {}
filtered_metrics_out = {}

# Create a mapping of period names to their corresponding Y_test_df
Y_test_dfs_out = {f'period{i+1:02d}': test_dfs_out[i] for i in range(6)}

In [None]:
# Loop over periods and horizons to read the dataframes and apply filtering
for period_name in periods:
    print(f"Processing {period_name}...")
    metrics_out[period_name] = {}
    filtered_metrics_out[period_name] = {}
    
    # Get the corresponding Y_test_df for the current period
    Y_test_df = Y_test_dfs_out[period_name]
    
    for horizon in horizons:
        # Construct the filename
        filename = f"metrics/outsample/{period_name}/metrics_{horizon}_{data_date}_{data_size}.csv"
        
        # Read the CSV file
        df = pd.read_csv(filename)
        
        # Ensure 'unique_id' is of type string
        df['unique_id'] = df['unique_id'].astype('string')

        # Store the original dataframe
        metrics_out[period_name][horizon] = df
        
        # Apply the filter_high_mape function
        filtered_df = filter_high_mape(df, Y_test_df)
        
        # Store the filtered dataframe
        filtered_metrics_out[period_name][horizon] = filtered_df

In [None]:
# Initialize a dictionary to store the summarized filtered metrics for each period
metric_results_filtered_out = {}

# Loop over each period to summarize the filtered metrics
for period_name in periods:
    # Retrieve the four filtered dataframes for the current period
    df1 = filtered_metrics_out[period_name]['1_day']
    df2 = filtered_metrics_out[period_name]['7_day']
    df3 = filtered_metrics_out[period_name]['14_day']
    df4 = filtered_metrics_out[period_name]['30_day']
    
    # Summarize the metrics using ts_utils.summarize_metrics()
    result = ts_utils.summarize_metrics(df1, df2, df3, df4)
    
    # Store the result
    metric_results_filtered_out[period_name] = result
    print(f"Finished summarizing metrics for {period_name}")

In [None]:
# Step 1: Collect the dataframes into a list
print("Collecting dataframes from 'metric_results_filtered'...")
dfs = []
for period_name in periods:
    df_styler = metric_results_filtered_out[period_name]
    df = df_styler.data  # Extract the DataFrame from the Styler object
    print(f"  Collected dataframe for {period_name} with shape {df.shape}")
    dfs.append(df)

# Step 2: Concatenate the dataframes along a new axis with keys as period names
print("\nConcatenating dataframes and adding 'period' as a new index level...")
combined_df = pd.concat(dfs, keys=periods, names=['period'])
print(f"Combined dataframe shape: {combined_df.shape}")
print("Combined dataframe index levels:", combined_df.index.names)
print("Combined dataframe columns:", combined_df.columns.tolist())

# Step 3: Compute the mean and standard deviation across periods for each metric and horizon
print("\nComputing mean and standard deviation across periods...")
mean_filtered_metrics_out = combined_df.groupby(['metric', 'horizon']).mean()
stds_metrics_out = combined_df.groupby(['metric', 'horizon']).std()

print(f"Filtered metrics (mean) dataframe shape: {mean_filtered_metrics_out.shape}")
print(f"Standard deviations dataframe shape: {stds_metrics_out.shape}")

In [None]:
# Apply styling to the averaged metrics when displaying
line_separator = [{'selector': 'tr', 'props': [('border-bottom', '1px solid black')]}]
styled_filtered_metrics_out = mean_filtered_metrics_out.style.highlight_min(color='palegreen', axis=1)
styled_filtered_metrics_out = styled_filtered_metrics_out.set_table_styles(line_separator, overwrite=False)
display(styled_filtered_metrics_out)

In [None]:
# Show also the standard deviations of the metrics
styled_stds_metrics_out = stds_metrics_out.style.set_table_styles(line_separator, overwrite=False)
display(styled_stds_metrics_out)

In [None]:
# Define model categories
model_categories = {
    'Naive': 'Baseline Model',
    'ARIMA': 'Baseline Model',
    'ETS': 'Baseline Model',
    'ES_bu': 'Baseline Model',
    'PatchTST': 'Deep Model',
    'NHITS': 'Deep Model',
    'DeepAR': 'Deep Model',
    'TimesNet': 'Deep Model',
    'ESRNN': 'Deep Model',
    'Chronos (small)': 'Pre-trained Model',
    'Chronos (large)': 'Pre-trained Model',
    'Chronos (FT)': 'Pre-trained Model',
    'Chronos': 'Pre-trained Model',
    'TimesFM': 'Pre-trained Model',
    'TimesFM (FT)': 'Pre-trained Model'
}

# Define lighter colors for each category
colors = {
    'Deep Model': '#FFA07A',  # light salmon
    'Pre-trained Model': '#9370DB',  # medium purple
    'Baseline Model': '#ADD8E6'  # light blue
}

In [None]:
ts_utils.Plotting_MAPE(styled_filtered_metrics, styled_filtered_metrics_out, model_categories, colors)

In [None]:
def Plotting_MAPE(df1, df2, std_df1, std_df2, model_categories, colors):
    # Convert data to DataFrame if it's a Styler object
    if isinstance(df1, pd.io.formats.style.Styler):
        df1 = df1.data
    if isinstance(df2, pd.io.formats.style.Styler):
        df2 = df2.data
    if isinstance(std_df1, pd.io.formats.style.Styler):
        std_df1 = std_df1.data
    if isinstance(std_df2, pd.io.formats.style.Styler):
        std_df2 = std_df2.data
    
    # Extract MAPE data for '30 days' horizon and sort
    mape_30d_df1 = df1.loc[('mape', '30 days')].sort_values(ascending=True)
    mape_std_30d_df1 = std_df1.loc[('mape', '30 days')][mape_30d_df1.index]
    
    mape_30d_df2 = df2.loc[('mape', '30 days')].sort_values(ascending=True)
    mape_std_30d_df2 = std_df2.loc[('mape', '30 days')][mape_30d_df2.index]
    
    models_df1 = mape_30d_df1.index
    models_df2 = mape_30d_df2.index
    
    num_models = len(df1.columns)

    # Plotting setup
    fig, axes = plt.subplots(ncols=2, figsize=(num_models * 14/8, num_models * 5/8))
    
    # Error bar config
    error_config = {'ecolor': 'black', 'elinewidth': 1, 'capsize': 3}
    
    # Plotting loop for in-sample and out-sample MAPE
    for ax, models, mape_data, mape_std_data, xlabel in zip(
            axes, 
            [models_df1, models_df2], 
            [mape_30d_df1, mape_30d_df2], 
            [mape_std_30d_df1, mape_std_30d_df2],
            ['In-sample MAPE', 'Out-sample MAPE']):
        
        colors_list = [colors[model_categories[model]] for model in models]
        
        # Plot horizontal bars with error bars
        bars = ax.barh(models, mape_data.values, xerr=mape_std_data.values, color=colors_list,
                       error_kw=error_config)
        
        # Set labels and invert y-axis
        ax.set_xlabel(xlabel, fontsize=14, fontweight='bold')
        ax.invert_yaxis()
        
        # Annotate bars with MAPE values and standard deviations
        for i, (v, std) in enumerate(zip(mape_data.values, mape_std_data.values)):
            ax.text(v + std + 0.02 * max(mape_data.values), i, f'{v:.2f}', 
                    color='black', va='center', fontsize=10)
        
        # Adjust x-axis limits to accommodate error bars and annotations
        max_x = max(mape_data.values + mape_std_data.values)
        ax.set_xlim(0, max_x + 0.2 * max_x)
    
    # Adjust the plot layout
    plt.subplots_adjust(left=0.3, right=0.95, top=0.85, bottom=0.1)
    
    # Adding legend for model categories
    handles = [plt.Rectangle((0,0),1,1, color=color) for color in colors.values()]
    labels = colors.keys()
    fig.legend(handles, labels, loc='upper center', ncol=len(labels))
    
    plt.tight_layout(rect=[0, 0, 1, 0.95])
    plt.show()

In [None]:
Plotting_MAPE(styled_filtered_metrics, styled_filtered_metrics_out, styled_stds_metrics, styled_stds_metrics_out, model_categories, colors)

In [None]:
from matplotlib.lines import Line2D

def plot_entropy_vs_metric(df, model_names):
    # Filter the DataFrame for metric == 'mape'
    df_filtered = df[df['metric'] == 'mape']

    # Select only the columns we need
    columns_to_select = ['ApproxEntropy'] + model_names
    df_selected = df_filtered[columns_to_select]

    # Melt the DataFrame to long format
    df_melted = df_selected.melt(
        id_vars=['ApproxEntropy'],
        value_vars=model_names,
        var_name='Model',
        value_name='MetricValue'
    )

    # Remove NaNs
    df_melted = df_melted.dropna(subset=['ApproxEntropy', 'MetricValue'])

    # Set up the plot
    plt.figure(figsize=(10, 6))

    # Define colors for models
    colors = plt.cm.tab10.colors  # Use a colormap with 10 distinct colors
    color_dict = dict(zip(model_names, colors[:len(model_names)]))

    # For custom legend handles
    legend_elements = []

    # Plot scatter plots and trendlines for each model
    for model in model_names:
        model_data = df_melted[df_melted['Model'] == model]
        x = model_data['ApproxEntropy']
        y = model_data['MetricValue']
        color = color_dict[model]

        # Plot scatter points
        plt.scatter(x, y, color=color, alpha=0.7)

        # Fit regression line
        if len(x) > 1:
            slope, intercept = np.polyfit(x, y, 1)
            x_vals = np.array([x.min(), x.max()])
            y_vals = intercept + slope * x_vals
            plt.plot(
                x_vals,
                y_vals,
                color=color,
                linestyle='--',
                linewidth=2
            )

        # Create a custom legend handle
        legend_element = Line2D(
            [0], [0],
            color=color,
            marker='o',
            linestyle='--',
            markersize=6,
            linewidth=2,
            label=model
        )
        legend_elements.append(legend_element)

    plt.xlabel('Approximate Entropy')
    plt.ylabel('MAPE')
    plt.title('Approximate Entropy vs. MAPE for Different Models')
    plt.grid(True)
    # plt.yscale('log')
    plt.tight_layout()

    # Add the legend
    plt.legend(handles=legend_elements, loc='best', title='Models')

    plt.show()

In [None]:
model_list = ['Naive', 'Chronos (large)']

plot_entropy_vs_metric(eval_30_days_grouped, model_list)