# Timeseries Forecasting (TimesNetmodel)

## 1. Installing dependencies

In [None]:
# pip install neuralforecast 

In [None]:
# pip install sktime

In [None]:
# pip install datasetsforecast

In [None]:
# Basics
import pandas as pd
import numpy as np

# Some functions for plotting and stuff
import utils as ts_utils

# Neural models
from neuralforecast import NeuralForecast
from neuralforecast.models import TimesNet
from neuralforecast.losses.pytorch import DistributionLoss
from neuralforecast.losses.pytorch import MQLoss

## 2. Data Preparation

In [None]:
# Size of the data to read
data_size = 'norm'

# Date of the data to read
data_date = '2110' # '1806' = 18th of June

# Read the data (takes around 2 minutes)
dataset = pd.read_csv(f"~/Thesis/data/eod_balances_{data_date}_{data_size}.csv")

dataset

In [None]:
# Create the timer
timer = ts_utils.Timer()

### 2.1 In-sample and Out-sample split

In [None]:
# Calculate total amount of timeseries
num_timeseries = len(dataset.columns) - 1

# Specify train test split percentage
train_test_split = 0.8

# Split into train and out of sample test data
num_out_of_sample = int(train_test_split * num_timeseries)

# Create in-sample dataframe
in_sample_data = dataset.iloc[:, : num_out_of_sample + 1] # Training and testing

# Create out-sample dataframe
n = num_timeseries-num_out_of_sample
columns_to_keep = dataset.columns[[0]].tolist() + dataset.columns[-n:].tolist()
out_sample_data = dataset[columns_to_keep]

## 3. In-sample Analysis

### 3.1 Train/Test splitting and plotting

In [None]:
# Change the data to the long format
Y_df = in_sample_data.melt(id_vars=['date'], var_name='unique_id', value_name='y')
Y_df = Y_df.rename(columns={'date':'ds'})

# Convert date column to datetime type
Y_df['ds'] = pd.to_datetime(Y_df['ds'])

In [None]:
# Define the horizon (12 months of 30 days each)
fh = 30
horizon = 12 * fh

# Identify the unique dates in the dataset
unique_dates = Y_df['ds'].unique()

# Convert to a list and then sort the dates
unique_dates = sorted(list(unique_dates))

# Determine the cutoff date (cutoff at 12 months before the last date in the dataset)
cutoff_date = unique_dates[-(horizon + 1)]

# Training data: all data up to the cutoff date
Y_train_df = Y_df[Y_df['ds'] <= cutoff_date]

In [None]:
# Initialize lists to store the input and test sets
input_dfs = []
test_dfs = []

# Loop to create the 6 input and test sets
for i in range(6):
    # Determine the start date of the test period
    test_start_date = unique_dates[-(horizon - i * 2 * fh)]
    test_end_date = unique_dates[-(horizon - (i * 2 * fh) - fh)]
    
    # Input data: all data up to the start of the current test period
    input_df = Y_df[Y_df['ds'] <= test_start_date]
    input_dfs.append(input_df)
    
    # Test data: the 30-day period following the start of the test period
    test_df = Y_df[(Y_df['ds'] > test_start_date) & (Y_df['ds'] <= test_end_date)]
    test_dfs.append(test_df)

# Define the 6 input periods
Y_input_df_0 = input_dfs[0]
Y_input_df_1 = input_dfs[1]
Y_input_df_2 = input_dfs[2]
Y_input_df_3 = input_dfs[3]
Y_input_df_4 = input_dfs[4]
Y_input_df_5 = input_dfs[5]

# Define the 6 test periods
Y_test_df_0 = test_dfs[0]
Y_test_df_1 = test_dfs[1]
Y_test_df_2 = test_dfs[2]
Y_test_df_3 = test_dfs[3]
Y_test_df_4 = test_dfs[4]
Y_test_df_5 = test_dfs[5]

In [None]:
# Timeserie to plot
unique_id = '6'

# Plot the train and test dataframes
ts_utils.plot_train_test_split(Y_input_df_0, Y_test_df_0, unique_id)

### 3.2 Training and predicting

In [None]:
# Decide max_steps:
if data_size == 'tiny':
    max_steps = 500   #checked 
if data_size == 'full':
    max_steps = 3500 # 3500 is checked == 500 epochs
if data_size == 'norm':
    max_steps = 3500

print(f"The maximum steps we take during training will be {max_steps}.")

#### 3.2.1 Train the model

In [None]:
# Set forecasting horizon
fh = 30

# Set our confidence levels
levels = [60, 70, 80, 90]

timer.record_timestamp("start_train")

timesnet_model_insample = NeuralForecast(models=[TimesNet(h=fh,
                                              input_size= 15 * fh,
                                              loss=MQLoss(level=levels),
                                              scaler_type='standard',
                                              max_steps=max_steps,
                                              val_check_steps=max_steps / 10)
                                    ], freq='D'
                            )

# Fit the model
timesnet_model_insample.fit(df=Y_train_df)

timer.record_timestamp("end_train")

#### 3.2.1 Retrieve predictions

In [None]:
timer.record_timestamp("start_inference")

# Make predictions on first period
timesnet_model_insample_preds_0 = timesnet_model_insample.predict(df=Y_input_df_0).rename(columns={'TimesNet-median': 'TimesNet'})

timer.record_timestamp("end_inference")

# Save predictions of first period
timesnet_model_insample_preds_0.to_csv(f'~/Thesis/predictions/TimesNet/insample/period01/model_preds_{data_date}_{data_size}.csv', index=True)

In [None]:
# Make predictions on second period
timesnet_model_insample_preds_1 = timesnet_model_insample.predict(df=Y_input_df_1).rename(columns={'TimesNet-median': 'TimesNet'})

# Save predictions of second period
timesnet_model_insample_preds_1.to_csv(f'~/Thesis/predictions/TimesNet/insample/period02/model_preds_{data_date}_{data_size}.csv', index=True)

In [None]:
# Make predictions on third period
timesnet_model_insample_preds_2 = timesnet_model_insample.predict(df=Y_input_df_2).rename(columns={'TimesNet-median': 'TimesNet'})

# Save predictions of third period
timesnet_model_insample_preds_2.to_csv(f'~/Thesis/predictions/TimesNet/insample/period03/model_preds_{data_date}_{data_size}.csv', index=True)

In [None]:
# Make predictions on fourth period
timesnet_model_insample_preds_3 = timesnet_model_insample.predict(df=Y_input_df_3).rename(columns={'TimesNet-median': 'TimesNet'})

# Save predictions of fourth period
timesnet_model_insample_preds_3.to_csv(f'~/Thesis/predictions/TimesNet/insample/period04/model_preds_{data_date}_{data_size}.csv', index=True)

In [None]:
# Make predictions on fifth period
timesnet_model_insample_preds_4 = timesnet_model_insample.predict(df=Y_input_df_4).rename(columns={'TimesNet-median': 'TimesNet'})

# Save predictions of fifth period
timesnet_model_insample_preds_4.to_csv(f'~/Thesis/predictions/TimesNet/insample/period05/model_preds_{data_date}_{data_size}.csv', index=True)

In [None]:
# Make predictions on sixth period
timesnet_model_insample_preds_5 = timesnet_model_insample.predict(df=Y_input_df_5).rename(columns={'TimesNet-median': 'TimesNet'})

# Save predictions of sixth period
timesnet_model_insample_preds_5.to_csv(f'~/Thesis/predictions/TimesNet/insample/period06/model_preds_{data_date}_{data_size}.csv', index=True)

## 4. Out-of-sample Analysis

### 4.1 Data Handling

In [None]:
# Change the data to the long format
Y_df = out_sample_data.melt(id_vars=['date'], var_name='unique_id', value_name='y')
Y_df = Y_df.rename(columns={'date':'ds'})

# Convert date column to datetime type
Y_df['ds'] = pd.to_datetime(Y_df['ds'])

In [None]:
# Define the horizon (12 months of 30 days each)
fh = 30
horizon = 12 * fh

# Identify the unique dates in the dataset
unique_dates = Y_df['ds'].unique()

# Convert to a list and then sort the dates
unique_dates = sorted(list(unique_dates))

# Determine the cutoff date (cutoff at 12 months before the last date in the dataset)
cutoff_date = unique_dates[-(horizon + 1)]

# Training data: all data up to the cutoff date
Y_train_df = Y_df[Y_df['ds'] <= cutoff_date]

In [None]:
# Initialize lists to store the input and test sets
input_dfs = []
test_dfs = []

# Loop to create the 6 input and test sets
for i in range(6):
    # Determine the start date of the test period
    test_start_date = unique_dates[-(horizon - i * 2 * fh)]
    test_end_date = unique_dates[-(horizon - (i * 2 * fh) - fh)]
    
    # Input data: all data up to the start of the current test period
    input_df = Y_df[Y_df['ds'] <= test_start_date]
    input_dfs.append(input_df)
    
    # Test data: the 30-day period following the start of the test period
    test_df = Y_df[(Y_df['ds'] > test_start_date) & (Y_df['ds'] <= test_end_date)]
    test_dfs.append(test_df)

# Define the 6 input periods
Y_input_df_0 = input_dfs[0]
Y_input_df_1 = input_dfs[1]
Y_input_df_2 = input_dfs[2]
Y_input_df_3 = input_dfs[3]
Y_input_df_4 = input_dfs[4]
Y_input_df_5 = input_dfs[5]

# Define the 6 test periods
Y_test_df_0 = test_dfs[0]
Y_test_df_1 = test_dfs[1]
Y_test_df_2 = test_dfs[2]
Y_test_df_3 = test_dfs[3]
Y_test_df_4 = test_dfs[4]
Y_test_df_5 = test_dfs[5]

In [None]:
# Timeserie to plot
unique_id = Y_train_df['unique_id'][0]

# Plot the train and test dataframes
ts_utils.plot_train_test_split(Y_train_df, Y_test_df_0, unique_id)

### 4.2 Retrieve predictions

In [None]:
# Make outsample predictions on first period (using already trained model)
timesnet_model_outsample_preds_0 = timesnet_model_insample.predict(df=Y_input_df_0).rename(columns={'TimesNet-median': 'TimesNet'})

# Save outsample predictions of first period
timesnet_model_outsample_preds_0.to_csv(f'~/Thesis/predictions/TimesNet/outsample/period01/model_preds_{data_date}_{data_size}.csv', index=True)

In [None]:
# Make outsample predictions on second period (using already trained model)
timesnet_model_outsample_preds_1 = timesnet_model_insample.predict(df=Y_input_df_1).rename(columns={'TimesNet-median': 'TimesNet'})

# Save outsample predictions of second period
timesnet_model_outsample_preds_1.to_csv(f'~/Thesis/predictions/TimesNet/outsample/period02/model_preds_{data_date}_{data_size}.csv', index=True)

In [None]:
# Make outsample predictions on third period (using already trained model)
timesnet_model_outsample_preds_2 = timesnet_model_insample.predict(df=Y_input_df_2).rename(columns={'TimesNet-median': 'TimesNet'})

# Save outsample predictions of third period
timesnet_model_outsample_preds_2.to_csv(f'~/Thesis/predictions/TimesNet/outsample/period03/model_preds_{data_date}_{data_size}.csv', index=True)

In [None]:
# Make outsample predictions on fourth period (using already trained model)
timesnet_model_outsample_preds_3 = timesnet_model_insample.predict(df=Y_input_df_3).rename(columns={'TimesNet-median': 'TimesNet'})

# Save outsample predictions of fourth period
timesnet_model_outsample_preds_3.to_csv(f'~/Thesis/predictions/TimesNet/outsample/period04/model_preds_{data_date}_{data_size}.csv', index=True)

In [None]:
# Make outsample predictions on fifth period (using already trained model)
timesnet_model_outsample_preds_4 = timesnet_model_insample.predict(df=Y_input_df_4).rename(columns={'TimesNet-median': 'TimesNet'})

# Save outsample predictions of fifth period
timesnet_model_outsample_preds_4.to_csv(f'~/Thesis/predictions/TimesNet/outsample/period05/model_preds_{data_date}_{data_size}.csv', index=True)

In [None]:
# Make outsample predictions on sixth period (using already trained model)
timesnet_model_outsample_preds_5 = timesnet_model_insample.predict(df=Y_input_df_5).rename(columns={'TimesNet-median': 'TimesNet'})

# Save outsample predictions of fifth period
timesnet_model_outsample_preds_5.to_csv(f'~/Thesis/predictions/TimesNet/outsample/period06/model_preds_{data_date}_{data_size}.csv', index=True)

### Model Statistics

In [None]:
train_time = timer.elapsed_time("start_train", "end_train")
inference_time = timer.elapsed_time("start_inference", "end_inference")

model_name = "TimesNet"
num_timeseries = 277

file_path = "model_statistics.txt"

ts_utils.write_statistics(model_name, num_timeseries, train_time, inference_time, file_path)