# Timeseries Forecasting (Chronos-large model)

## 1. Installing dependencies

In [None]:
# !pip install autogluon.timeseries 

In [None]:
import pandas as pd

# Some functions for plotting and stuff
import utils as ts_utils

# Foundation models
from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor

## 2. Data Preparation

In [None]:
# Size of the data to read
data_size = 'norm'

# Date of the data to read
data_date = '2110' # '1806' = 18th of June

# Read the data (takes around 2 minutes)
dataset = pd.read_csv(f"~/Thesis/data/eod_balances_{data_date}_{data_size}.csv")

dataset

In [None]:
# Create the timer
timer = ts_utils.Timer()

### 2.1 In-sample and Out-sample split

In [None]:
# Calculate total amount of timeseries
num_timeseries = len(dataset.columns) - 1

# Specify train test split percentage
train_test_split = 0.8

# Split into train and out of sample test data
num_out_of_sample = int(train_test_split * num_timeseries)

# Create in-sample dataframe
in_sample_data = dataset.iloc[:, : num_out_of_sample + 1] # Training and testing

# Create out-sample dataframe
n = num_timeseries-num_out_of_sample
columns_to_keep = dataset.columns[[0]].tolist() + dataset.columns[-n:].tolist()
out_sample_data = dataset[columns_to_keep]

## 3. In-sample Analysis

### 3.1 Train/Test splitting and plotting

In [None]:
# Change the data to the long format
Y_df = in_sample_data.melt(id_vars=['date'], var_name='unique_id', value_name='y')
Y_df = Y_df.rename(columns={'date':'ds'})

# Convert date column to datetime type
Y_df['ds'] = pd.to_datetime(Y_df['ds'])

In [None]:
# Define the horizon (12 months of 30 days each)
fh = 30
horizon = 12 * fh

# Identify the unique dates in the dataset
unique_dates = Y_df['ds'].unique()

# Convert to a list and then sort the dates
unique_dates = sorted(list(unique_dates))

# Determine the cutoff date (cutoff at 12 months before the last date in the dataset)
cutoff_date = unique_dates[-(horizon + 1)]

# Training data: all data up to the cutoff date
Y_train_df = Y_df[Y_df['ds'] <= cutoff_date]

In [None]:
# Initialize lists to store the input and test sets
input_dfs = []
test_dfs = []

# Loop to create the 6 input and test sets
for i in range(6):
    # Determine the start date of the test period
    test_start_date = unique_dates[-(horizon - i * 2 * fh)]
    test_end_date = unique_dates[-(horizon - (i * 2 * fh) - fh)]
    
    # Input data: all data up to the start of the current test period
    input_df = Y_df[Y_df['ds'] <= test_start_date]
    input_dfs.append(input_df)
    
    # Test data: the 30-day period following the start of the test period
    test_df = Y_df[(Y_df['ds'] > test_start_date) & (Y_df['ds'] <= test_end_date)]
    test_dfs.append(test_df)

### 3.2 Rename columns for chronos specific data handling

In [None]:
# For the Chronos model we need to rename some columns
Y_train_df_chronos = Y_train_df.rename(columns={"ds": "timestamp", "unique_id": "item_id", "y": "target"})

In [None]:
# Define the 6 input periods
Y_input_df_0 = TimeSeriesDataFrame(input_dfs[0].rename(columns={"ds": "timestamp", "unique_id": "item_id", "y": "target"}))
Y_input_df_1 = TimeSeriesDataFrame(input_dfs[1].rename(columns={"ds": "timestamp", "unique_id": "item_id", "y": "target"}))
Y_input_df_2 = TimeSeriesDataFrame(input_dfs[2].rename(columns={"ds": "timestamp", "unique_id": "item_id", "y": "target"}))
Y_input_df_3 = TimeSeriesDataFrame(input_dfs[3].rename(columns={"ds": "timestamp", "unique_id": "item_id", "y": "target"}))
Y_input_df_4 = TimeSeriesDataFrame(input_dfs[4].rename(columns={"ds": "timestamp", "unique_id": "item_id", "y": "target"}))
Y_input_df_5 = TimeSeriesDataFrame(input_dfs[5].rename(columns={"ds": "timestamp", "unique_id": "item_id", "y": "target"}))

# Define the 6 test periods
Y_test_df_0 = TimeSeriesDataFrame(test_dfs[0].rename(columns={"ds": "timestamp", "unique_id": "item_id", "y": "target"}))
Y_test_df_1 = TimeSeriesDataFrame(test_dfs[1].rename(columns={"ds": "timestamp", "unique_id": "item_id", "y": "target"}))
Y_test_df_2 = TimeSeriesDataFrame(test_dfs[2].rename(columns={"ds": "timestamp", "unique_id": "item_id", "y": "target"}))
Y_test_df_3 = TimeSeriesDataFrame(test_dfs[3].rename(columns={"ds": "timestamp", "unique_id": "item_id", "y": "target"}))
Y_test_df_4 = TimeSeriesDataFrame(test_dfs[4].rename(columns={"ds": "timestamp", "unique_id": "item_id", "y": "target"}))
Y_test_df_5 = TimeSeriesDataFrame(test_dfs[5].rename(columns={"ds": "timestamp", "unique_id": "item_id", "y": "target"}))

In [None]:
# Timeserie to plot
unique_id = '6'

# Plot the train and test dataframes
ts_utils.plot_train_test_split(input_dfs[0], test_dfs[0], unique_id)

### 3.2 Training (in this case it means nothing)

In [None]:
# Set the horizon
horizon = 30

timer.record_timestamp("start_train")

# Initialize the chronos model
predictor_insample = TimeSeriesPredictor(
    prediction_length=horizon,
    path="chronos-model-runs/chronos-large-model",
    target="target",
    eval_metric="MASE")

# Fit the model
predictor_insample.fit(Y_train_df_chronos,
              presets='chronos_large',
              time_limit=60,
             )

timer.record_timestamp("end_train")

### 3.3 Predicting (in this case it means nothing)

In [None]:
# Function that renames the columns correctly
def rename_confidence_intervals(df):
    # Remove the 0.5 column if it exists
    if '0.5' in df.columns:
        df = df.drop(columns=['0.5'])
    
    # Create mapping dictionary
    mapping = {
        '0.1': 'Chronos-large-lo-90',
        '0.2': 'Chronos-large-lo-80',
        '0.3': 'Chronos-large-lo-70',
        '0.4': 'Chronos-large-lo-60',
        '0.6': 'Chronos-large-hi-60',
        '0.7': 'Chronos-large-hi-70',
        '0.8': 'Chronos-large-hi-80',
        '0.9': 'Chronos-large-hi-90'
    }
    
    # Rename columns
    return df.rename(columns=mapping)

In [None]:
timer.record_timestamp("start_inference")

# Make the predictions
chronos_model_insample_preds_0 = predictor_insample.predict(Y_input_df_0)

timer.record_timestamp("end_inference")

# Change predictions df to correct format
chronos_model_insample_preds_0 = rename_confidence_intervals(
    chronos_model_insample_preds_0.reset_index().rename(columns={'item_id': 'unique_id', 'timestamp': 'ds', 'mean': 'Chronos-large'})
)

# Save the predictions of the first period
chronos_model_insample_preds_0.to_csv(f'~/Thesis/predictions/Chronos-large/insample/period01/model_preds_{data_date}_{data_size}.csv', index=False)

In [None]:
# Make the predictions for the second period
chronos_model_insample_preds_1 = predictor_insample.predict(Y_input_df_1)

# Change predictions df to correct format
chronos_model_insample_preds_1 = rename_confidence_intervals(
    chronos_model_insample_preds_1.reset_index().rename(columns={'item_id': 'unique_id', 'timestamp': 'ds', 'mean': 'Chronos-large'})
)

# Save the predictions of the second period
chronos_model_insample_preds_1.to_csv(f'~/Thesis/predictions/Chronos-large/insample/period02/model_preds_{data_date}_{data_size}.csv', index=False)

In [None]:
# Make the predictions for the third period
chronos_model_insample_preds_2 = predictor_insample.predict(Y_input_df_2)

# Change predictions df to correct format
chronos_model_insample_preds_2 = rename_confidence_intervals(
    chronos_model_insample_preds_2.reset_index().rename(columns={'item_id': 'unique_id', 'timestamp': 'ds', 'mean': 'Chronos-large'})
)

# Save the predictions of the third period
chronos_model_insample_preds_2.to_csv(f'~/Thesis/predictions/Chronos-large/insample/period03/model_preds_{data_date}_{data_size}.csv', index=False)

In [None]:
# Make the predictions for the fourth period
chronos_model_insample_preds_3 = predictor_insample.predict(Y_input_df_3)

# Change predictions df to correct format
chronos_model_insample_preds_3 = rename_confidence_intervals(
    chronos_model_insample_preds_3.reset_index().rename(columns={'item_id': 'unique_id', 'timestamp': 'ds', 'mean': 'Chronos-large'})
)

# Save the predictions of the fourth period
chronos_model_insample_preds_3.to_csv(f'~/Thesis/predictions/Chronos-large/insample/period04/model_preds_{data_date}_{data_size}.csv', index=False)

In [None]:
# Make the predictions for the fifth period
chronos_model_insample_preds_4 = predictor_insample.predict(Y_input_df_4)

# Change predictions df to correct format
chronos_model_insample_preds_4 = rename_confidence_intervals(
    chronos_model_insample_preds_4.reset_index().rename(columns={'item_id': 'unique_id', 'timestamp': 'ds', 'mean': 'Chronos-large'})
)

# Save the predictions of the fifth period
chronos_model_insample_preds_4.to_csv(f'~/Thesis/predictions/Chronos-large/insample/period05/model_preds_{data_date}_{data_size}.csv', index=False)

In [None]:
# Make the predictions for the sixth period
chronos_model_insample_preds_5 = predictor_insample.predict(Y_input_df_5)

# Change predictions df to correct format
chronos_model_insample_preds_5 = rename_confidence_intervals(
    chronos_model_insample_preds_5.reset_index().rename(columns={'item_id': 'unique_id', 'timestamp': 'ds', 'mean': 'Chronos-large'})
)

# Save the predictions of the second period
chronos_model_insample_preds_5.to_csv(f'~/Thesis/predictions/Chronos-large/insample/period06/model_preds_{data_date}_{data_size}.csv', index=False)

## 4. Out-of-sample Analysis

### 4.1 Train/Test splitting and plotting

In [None]:
# Change the data to the long format
Y_df = out_sample_data.melt(id_vars=['date'], var_name='unique_id', value_name='y')
Y_df = Y_df.rename(columns={'date':'ds'})

# Convert date column to datetime type
Y_df['ds'] = pd.to_datetime(Y_df['ds'])

In [None]:
# Define the horizon (12 months of 30 days each)
fh = 30
horizon = 12 * fh

# Identify the unique dates in the dataset
unique_dates = Y_df['ds'].unique()

# Convert to a list and then sort the dates
unique_dates = sorted(list(unique_dates))

# Determine the cutoff date (cutoff at 12 months before the last date in the dataset)
cutoff_date = unique_dates[-(horizon + 1)]

# Training data: all data up to the cutoff date
Y_train_df = Y_df[Y_df['ds'] <= cutoff_date]

In [None]:
# Initialize lists to store the input and test sets
input_dfs = []
test_dfs = []

# Loop to create the 6 input and test sets
for i in range(6):
    # Determine the start date of the test period
    test_start_date = unique_dates[-(horizon - i * 2 * fh)]
    test_end_date = unique_dates[-(horizon - (i * 2 * fh) - fh)]
    
    # Input data: all data up to the start of the current test period
    input_df = Y_df[Y_df['ds'] <= test_start_date]
    input_dfs.append(input_df)
    
    # Test data: the 30-day period following the start of the test period
    test_df = Y_df[(Y_df['ds'] > test_start_date) & (Y_df['ds'] <= test_end_date)]
    test_dfs.append(test_df)

### 4.1.1 Rename columns for chronos specific data handling

In [None]:
# For the Chronos model we need to rename some columns
Y_train_df_chronos = Y_train_df.rename(columns={"ds": "timestamp", "unique_id": "item_id", "y": "target"})

In [None]:
# Define the 6 input periods
Y_input_df_0 = TimeSeriesDataFrame(input_dfs[0].rename(columns={"ds": "timestamp", "unique_id": "item_id", "y": "target"}))
Y_input_df_1 = TimeSeriesDataFrame(input_dfs[1].rename(columns={"ds": "timestamp", "unique_id": "item_id", "y": "target"}))
Y_input_df_2 = TimeSeriesDataFrame(input_dfs[2].rename(columns={"ds": "timestamp", "unique_id": "item_id", "y": "target"}))
Y_input_df_3 = TimeSeriesDataFrame(input_dfs[3].rename(columns={"ds": "timestamp", "unique_id": "item_id", "y": "target"}))
Y_input_df_4 = TimeSeriesDataFrame(input_dfs[4].rename(columns={"ds": "timestamp", "unique_id": "item_id", "y": "target"}))
Y_input_df_5 = TimeSeriesDataFrame(input_dfs[5].rename(columns={"ds": "timestamp", "unique_id": "item_id", "y": "target"}))

# Define the 6 test periods
Y_test_df_0 = TimeSeriesDataFrame(test_dfs[0].rename(columns={"ds": "timestamp", "unique_id": "item_id", "y": "target"}))
Y_test_df_1 = TimeSeriesDataFrame(test_dfs[1].rename(columns={"ds": "timestamp", "unique_id": "item_id", "y": "target"}))
Y_test_df_2 = TimeSeriesDataFrame(test_dfs[2].rename(columns={"ds": "timestamp", "unique_id": "item_id", "y": "target"}))
Y_test_df_3 = TimeSeriesDataFrame(test_dfs[3].rename(columns={"ds": "timestamp", "unique_id": "item_id", "y": "target"}))
Y_test_df_4 = TimeSeriesDataFrame(test_dfs[4].rename(columns={"ds": "timestamp", "unique_id": "item_id", "y": "target"}))
Y_test_df_5 = TimeSeriesDataFrame(test_dfs[5].rename(columns={"ds": "timestamp", "unique_id": "item_id", "y": "target"}))

### 4.2 Training and predicting

In [None]:
# Make the predictions for the first period
chronos_model_outsample_preds_0 = predictor_insample.predict(Y_input_df_0)

# Change predictions df to correct format
chronos_model_outsample_preds_0 = rename_confidence_intervals(
    chronos_model_outsample_preds_0.reset_index().rename(columns={'item_id': 'unique_id', 'timestamp': 'ds', 'mean': 'Chronos-large'})
)

# Save the predictions of the second period
chronos_model_outsample_preds_0.to_csv(f'~/Thesis/predictions/Chronos-large/outsample/period01/model_preds_{data_date}_{data_size}.csv', index=False)

In [None]:
# Make the predictions for the first period
chronos_model_outsample_preds_1 = predictor_insample.predict(Y_input_df_1)

# Change predictions df to correct format
chronos_model_outsample_preds_1 = rename_confidence_intervals(
    chronos_model_outsample_preds_1.reset_index().rename(columns={'item_id': 'unique_id', 'timestamp': 'ds', 'mean': 'Chronos-large'})
)

# Save the predictions of the second period
chronos_model_outsample_preds_1.to_csv(f'~/Thesis/predictions/Chronos-large/outsample/period02/model_preds_{data_date}_{data_size}.csv', index=False)

In [None]:
# Make the predictions for the first period
chronos_model_outsample_preds_2 = predictor_insample.predict(Y_input_df_2)

# Change predictions df to correct format
chronos_model_outsample_preds_2 = rename_confidence_intervals(
    chronos_model_outsample_preds_2.reset_index().rename(columns={'item_id': 'unique_id', 'timestamp': 'ds', 'mean': 'Chronos-large'})
)

# Save the predictions of the second period
chronos_model_outsample_preds_2.to_csv(f'~/Thesis/predictions/Chronos-large/outsample/period03/model_preds_{data_date}_{data_size}.csv', index=False)

In [None]:
# Make the predictions for the first period
chronos_model_outsample_preds_3 = predictor_insample.predict(Y_input_df_3)

# Change predictions df to correct format
chronos_model_outsample_preds_3 = rename_confidence_intervals(
    chronos_model_outsample_preds_3.reset_index().rename(columns={'item_id': 'unique_id', 'timestamp': 'ds', 'mean': 'Chronos-large'})
)

# Save the predictions of the second period
chronos_model_outsample_preds_3.to_csv(f'~/Thesis/predictions/Chronos-large/outsample/period04/model_preds_{data_date}_{data_size}.csv', index=False)

In [None]:
# Make the predictions for the first period
chronos_model_outsample_preds_4 = predictor_insample.predict(Y_input_df_4)

# Change predictions df to correct format
chronos_model_outsample_preds_4 = rename_confidence_intervals(
    chronos_model_outsample_preds_4.reset_index().rename(columns={'item_id': 'unique_id', 'timestamp': 'ds', 'mean': 'Chronos-large'})
)

# Save the predictions of the second period
chronos_model_outsample_preds_4.to_csv(f'~/Thesis/predictions/Chronos-large/outsample/period05/model_preds_{data_date}_{data_size}.csv', index=False)

In [None]:
# Make the predictions for the first period
chronos_model_outsample_preds_5 = predictor_insample.predict(Y_input_df_5)

# Change predictions df to correct format
chronos_model_outsample_preds_5 = rename_confidence_intervals(
    chronos_model_outsample_preds_5.reset_index().rename(columns={'item_id': 'unique_id', 'timestamp': 'ds', 'mean': 'Chronos-large'})
)

# Save the predictions of the second period
chronos_model_outsample_preds_5.to_csv(f'~/Thesis/predictions/Chronos-large/outsample/period06/model_preds_{data_date}_{data_size}.csv', index=False)

### Model statistics

In [None]:
train_time = timer.elapsed_time("start_train", "end_train")
inference_time = timer.elapsed_time("start_inference", "end_inference")

model_name = "Chronos-large"
num_timeseries = 277

file_path = "model_statistics.txt"

ts_utils.write_statistics(model_name, num_timeseries, train_time, inference_time, file_path)