## Time-Series Forecasting

This notebook covers the experimentation for choosing the forecasting model architecture to be used in the proposed design.

In [1]:
#Necessary imports

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import joblib
import re

from darts import TimeSeries
from darts.dataprocessing.transformers import Scaler
from darts.metrics import mape, rmse
from darts.utils.timeseries_generation import datetime_attribute_timeseries
from darts.models import TFTModel


  from .autonotebook import tqdm as notebook_tqdm



## Train

In [2]:
df = pd.read_csv('data/master_data.zip', compression="zip")
df["timestamp"] = pd.to_datetime(df["timestamp"], unit='s')

#Movie names
movie_dict = joblib.load("data/movie_dict.pkl")

all_movies = np.array(list(movie_dict.keys()))
#joblib.dump(all_movies, "all_movies_forecast_order-array.pkl")

### Generate Time-Series

- Historical demands of movies have been increased by 1 to prevent possible errors in calculations.
- Movies that have less than 48 time-steps are excluded for this experimentation to ensure a minimum validation split of 25%. 
- For a faster implementation of this experiment, only 100 selected movies are considered as a subset of samples.


Covariates are generated for Month and Year values from the target time-series index.

In [3]:
global_series = []
global_covariates = []
movie_ids = []

excluded_movie_ids = []
MINIMUM_MONTHS = 24

for i in all_movies:
    ts_movie = df.loc[df[df["movieId"]==i].index, :]
    ts_movie = ts_movie.set_index("timestamp")
    ts_movie = ts_movie.groupby(pd.Grouper(freq='M'))["userId"].count()
    ts_movie.name = "RatingCounts"
    ts_movie = ts_movie + 1 #For preventing errors in calculations
    ts_movie = pd.DataFrame(ts_movie).reset_index()
    
    if len(ts_movie) < MINIMUM_MONTHS:
        excluded_movie_ids.append(i)
    
    else:
        movie_ids.append(i)
        ts = TimeSeries.from_dataframe(ts_movie, "timestamp", "RatingCounts")
        # Set aside the last 12 months as validation series
        global_series.append(ts)

        covs = datetime_attribute_timeseries(ts, attribute="year", one_hot=False)
        covs = covs.stack(datetime_attribute_timeseries(ts, attribute="month", one_hot=False))
        covs = covs.astype(np.float32)
        
        global_covariates.append(covs)

joblib.dump(excluded_movie_ids, "excluded_movie_ids.pkl")

['excluded_movie_ids.pkl']

In [4]:
# target_ts_dict = dict(zip(list(all_movies), global_series))
# covariate_ts_dict = dict(zip(list(all_movies), global_covariates))

# joblib.dump(target_ts_dict, "target_ts_dict.pkl")
# joblib.dump(covariate_ts_dict, "covariate_ts_dict.pkl")

Both the target values and covariates are scaled.

In [7]:
#scale
target_scaler = Scaler()
global_series_scaled = target_scaler.fit_transform(global_series, n_jobs=-1)
joblib.dump(global_series_scaled, "target_ts_scaled_series.pkl")
target_ts_scaled_dict = dict(zip(movie_ids, global_series_scaled))
joblib.dump(target_ts_scaled_dict, "target_ts_scaled_dict.pkl")
joblib.dump(target_scaler, "target_scaler.pkl")

covariate_scaler = Scaler()
global_covariates_scaled = covariate_scaler.fit_transform(global_covariates, n_jobs=-1)
joblib.dump(global_covariates_scaled, "covariates_ts_scaled_series.pkl")
covariate_ts_scaled_dict = dict(zip(movie_ids, global_covariates_scaled))
joblib.dump(covariate_ts_scaled_dict, "covariate_ts_scaled_dict.pkl")
joblib.dump(covariate_scaler, "covariate_scaler.pkl")

['covariate_scaler.pkl']

In [3]:
global_series_scaled = joblib.load("data/forecasting/target_ts_scaled_series.pkl")
global_covariates_scaled = joblib.load("data/forecasting/covariates_ts_scaled_series.pkl")

global_series_scaled_list = list(global_series_scaled.values())
global_covariates_scaled_list = list(global_covariates_scaled.values())

In [2]:
global_series_scaled = joblib.load("data/forecasting/target_ts_scaled_series.pkl")
global_covariates_scaled = joblib.load("data/forecasting/covariates_ts_scaled_series.pkl")

In [3]:
INPUT_CHUNK_LENGTH = 12
OUTPUT_CHUNK_LENGTH = 1
N_EPOCHS = 20
PREDICTION_LENGTH = 12


model = TFTModel(input_chunk_length=INPUT_CHUNK_LENGTH, output_chunk_length=OUTPUT_CHUNK_LENGTH, n_epochs=N_EPOCHS, add_encoders={"cyclic": {"future": ["month"]}})
model.fit(global_series_scaled, past_covariates=global_covariates_scaled)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

   | Name                              | Type                             | Params
----------------------------------------------------------------------------------------
0  | train_metrics                     | MetricCollection                 | 0     
1  | val_metrics                       | MetricCollection                 | 0     
2  | input_embeddings                  | _MultiEmbedding                  | 0     
3  | static_covariates_vsn             | _VariableSelectionNetwork        | 0     
4  | encoder_vsn                       | _VariableSelectionNetwork        | 3.0 K 
5  | decoder_vsn                       | _VariableSelectionNetwork        | 1.2 K 
6  | static_context_grn                | _GatedResidualNetwork            | 1.1 K 
7  | static_context_hidden_encoder_grn | _GatedResidualNetwork            | 1.1 K 
8  | static_cont

Epoch 19: 100%|██████████| 30228/30228 [30:40<00:00, 16.42it/s, train_loss=0.0814]

`Trainer.fit` stopped: `max_epochs=20` reached.


Epoch 19: 100%|██████████| 30228/30228 [30:40<00:00, 16.42it/s, train_loss=0.0814]


TFTModel(hidden_size=16, lstm_layers=1, num_attention_heads=4, full_attention=False, feed_forward=GatedResidualNetwork, dropout=0.1, hidden_continuous_size=8, categorical_embedding_sizes=None, add_relative_index=False, loss_fn=None, likelihood=None, norm_type=LayerNorm, use_static_covariates=True, input_chunk_length=12, output_chunk_length=1, n_epochs=20, add_encoders={'cyclic': {'future': ['month']}})

In [4]:
model.save("forecasting-model.pkl")

In [None]:
#CONVERT FROM GPU TO CPU

import torch
model_loaded = torch.load('test_model.pkl', map_location=torch.device('cpu'))
torch.save(model_loaded, 'cpu-test_model.pkl')

from darts.models import TFTModel

model_loaded = TFTModel.load("cpu-test_model.pkl")