<a href="https://colab.research.google.com/github/Bumblebee2397/Time-Series-Forecasting/blob/master/TFT_for_time_series_forecasting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Preliminary Setup**

### Installing required packages

In [12]:
# Uses pip3 to install necessary packages
!pip3 install pyunpack wget patool plotly cufflinks --user

# Resets the IPython kernel to import the installed package.
import IPython
app = IPython.Application.instance()
app.kernel.do_shutdown(True)



{'restart': True, 'status': 'ok'}

In [1]:
# Changing the tensorflow version from 2.x to 1.x
%tensorflow_version 1.x
import tensorflow as tf
print(tf.__version__)

TensorFlow 1.x selected.
1.15.2


### Traversing to the directory containing TFT code (downloaded from Github)

In [2]:
# Check current directory
import os
print(os.getcwd())

/content


In [3]:
# Traverse to the directory containing TFT
os.chdir('/content/drive/My Drive/TFT for TSF/tft')

### Downloading the traffic dataset

In [4]:
import pandas as pd
from script_download_data import main as download_data

In [5]:
# Download parameters
expt_name = 'traffic'                                  # Name of default experiment
output_folder = os.path.join(os.getcwd(), 'outputs')   # Root folder to save experiment outputs
force_download = False                                 # Skips download if data is already present

if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Downloads main csv file if not present
csv_file = os.path.join(output_folder,'data','traffic', 'hourly_data.csv')
if not os.path.exists(csv_file):
    download_data(expt_name, force_download=True, output_folder=output_folder)

# Load the downloaded data
df = pd.read_csv(csv_file, index_col=0)

  mask |= (ar1 == a)


In [6]:
# Have a look at the data
df.head(24)

Unnamed: 0,values,prev_values,next_values,sensor_day,time_on_day,day_of_week,id,categorical_id,hours_from_start,categorical_day_of_week,categorical_time_on_day
0,0.019333,0.020933,0.0202,0,1,4,400000,400000,1.0,4,1
1,0.0202,0.019333,0.02245,0,2,4,400000,400000,2.0,4,2
2,0.02245,0.0202,0.029283,0,3,4,400000,400000,3.0,4,3
3,0.029283,0.02245,0.055483,0,4,4,400000,400000,4.0,4,4
4,0.055483,0.029283,0.073933,0,5,4,400000,400000,5.0,4,5
5,0.073933,0.055483,0.057067,0,6,4,400000,400000,6.0,4,6
6,0.057067,0.073933,0.059383,0,7,4,400000,400000,7.0,4,7
7,0.059383,0.057067,0.06405,0,8,4,400000,400000,8.0,4,8
8,0.06405,0.059383,0.069033,0,9,4,400000,400000,9.0,4,9
9,0.069033,0.06405,0.066433,0,10,4,400000,400000,10.0,4,10


### Data Definitions

In [7]:
from data_formatters.base import GenericDataFormatter, DataTypes, InputTypes

# View avialable inputs and data types.
print("Available data types:")
for option in DataTypes:
    print(option)

print()
print("Avaialbe input types:")
for option in InputTypes:
    print(option)

Available data types:
DataTypes.REAL_VALUED
DataTypes.CATEGORICAL
DataTypes.DATE

Avaialbe input types:
InputTypes.TARGET
InputTypes.OBSERVED_INPUT
InputTypes.KNOWN_INPUT
InputTypes.STATIC_INPUT
InputTypes.ID
InputTypes.TIME


In [8]:
from libs import utils        # Load TFT helper functions
import sklearn.preprocessing  # Used for data standardization

# Implement formatting functions
class TrafficFormatter(GenericDataFormatter):
    """Defines and formats data for the traffic dataset.

    This also performs z-score normalization across the entire dataset, hence
    re-uses most of the same functions as volatility.

    Attributes:
    column_definition: Defines input and data type of column used in the
      experiment.
    identifiers: Entity identifiers used in experiments.
    """
    
    # This defines the types used by each column
    _column_definition = [
      ('id', DataTypes.REAL_VALUED, InputTypes.ID),   
      ('hours_from_start', DataTypes.REAL_VALUED, InputTypes.TIME),
      ('values', DataTypes.REAL_VALUED, InputTypes.TARGET),
      ('time_on_day', DataTypes.REAL_VALUED, InputTypes.KNOWN_INPUT),
      ('day_of_week', DataTypes.REAL_VALUED, InputTypes.KNOWN_INPUT),
      ('hours_from_start', DataTypes.REAL_VALUED, InputTypes.KNOWN_INPUT),
      ('categorical_id', DataTypes.CATEGORICAL, InputTypes.STATIC_INPUT),
    ]

    def split_data(self, df, valid_boundary=151, test_boundary=166):
        """Splits data frame into training-validation-test data frames.

        This also calibrates scaling object, and transforms data for each split.

        Args:
          df: Source data frame to split.
          valid_boundary: Starting year for validation data
          test_boundary: Starting year for test data

        Returns:
          Tuple of transformed (train, valid, test) data.
        """

        print('Formatting train-valid-test splits.')

        index = df['sensor_day']
        train = df.loc[index < valid_boundary]
        valid = df.loc[(index >= valid_boundary - 7) & (index < test_boundary)]
        test = df.loc[index >= test_boundary - 7]

        self.set_scalers(train)

        return (self.transform_inputs(data) for data in [train, valid, test])

    def set_scalers(self, df):
        """Calibrates scalers using the data supplied.

        Args:
          df: Data to use to calibrate scalers.
        """
        print('Setting scalers with training data...')

        column_definitions = self.get_column_definition()
        id_column = utils.get_single_col_by_input_type(InputTypes.ID,
                                                       column_definitions)
        target_column = utils.get_single_col_by_input_type(InputTypes.TARGET,
                                                           column_definitions)

        # Extract identifiers in case required
        self.identifiers = list(df[id_column].unique())

        # Format real scalers
        real_inputs = utils.extract_cols_from_data_type(
            DataTypes.REAL_VALUED, column_definitions,
            {InputTypes.ID, InputTypes.TIME})

        data = df[real_inputs].values
        self._real_scalers = sklearn.preprocessing.StandardScaler().fit(data)
        self._target_scaler = sklearn.preprocessing.StandardScaler().fit(
            df[[target_column]].values)  # used for predictions

        # Format categorical scalers
        categorical_inputs = utils.extract_cols_from_data_type(
            DataTypes.CATEGORICAL, column_definitions,
            {InputTypes.ID, InputTypes.TIME})

        categorical_scalers = {}
        num_classes = []
        for col in categorical_inputs:
            # Set all to str so that we don't have mixed integer/string columns
            srs = df[col].apply(str)
            categorical_scalers[col] = sklearn.preprocessing.LabelEncoder().fit(
              srs.values)
            num_classes.append(srs.nunique())

        # Set categorical scaler outputs
        self._cat_scalers = categorical_scalers
        self._num_classes_per_cat_input = num_classes

    def transform_inputs(self, df):
        """Performs feature transformations.

        This includes both feature engineering, preprocessing and normalisation.

        Args:
          df: Data frame to transform.

        Returns:
          Transformed data frame.

        """
        output = df.copy()

        if self._real_scalers is None and self._cat_scalers is None:
            raise ValueError('Scalers have not been set!')

        column_definitions = self.get_column_definition()

        real_inputs = utils.extract_cols_from_data_type(
            DataTypes.REAL_VALUED, column_definitions,
            {InputTypes.ID, InputTypes.TIME})
        categorical_inputs = utils.extract_cols_from_data_type(
            DataTypes.CATEGORICAL, column_definitions,
            {InputTypes.ID, InputTypes.TIME})

        # Format real inputs
        output[real_inputs] = self._real_scalers.transform(df[real_inputs].values)

        # Format categorical inputs
        for col in categorical_inputs:
            string_df = df[col].apply(str)
            output[col] = self._cat_scalers[col].transform(string_df)

        return output

    def format_predictions(self, predictions):
        """Reverts any normalisation to give predictions in original scale.

        Args:
          predictions: Dataframe of model predictions.

        Returns:
          Data frame of unnormalised predictions.
        """
        output = predictions.copy()

        column_names = predictions.columns

        for col in column_names:
            if col not in {'forecast_time', 'identifier'}:
                output[col] = self._target_scaler.inverse_transform(predictions[col])

        return output
    
    
    def get_fixed_params(self):
        """Returns fixed model parameters for experiments."""

        fixed_params = {
            'total_time_steps': 8*24,     # Total width of the Temporal Fusion Decoder
            'num_encoder_steps': 7*24,    # Length of LSTM decoder (ie. # historical inputs)
            'num_epochs': 100,            # Max number of epochs for training 
            'early_stopping_patience': 5, # Early stopping threshold for # iterations with no loss improvement
            'multiprocessing_workers': 5  # Number of multi-processing workers
        }

        return fixed_params

###  Tranining and Evaluating the TFT

In [9]:
# Create a data formatter 
data_formatter = TrafficFormatter()

# Split data 
train, valid, test = data_formatter.split_data(df)

data_params = data_formatter.get_experiment_params()

# Model parameters for calibration
model_params = {'dropout_rate': 0.3,      # Dropout discard rate
                'hidden_layer_size': 320, # Internal state size of TFT
                'learning_rate': 0.001,   # ADAM initial learning rate
                'minibatch_size': 128,    # Minibatch size for training
                'max_gradient_norm': 100.,# Max norm for gradient clipping
                'num_heads': 4,           # Number of heads for multi-head attention
                'stack_size': 1           # Number of stacks (default 1 for interpretability)
               }

# Folder to save network weights during training.
model_folder = os.path.join(output_folder, 'saved_models', 'traffic', 'fixed')
model_params['model_folder'] = model_folder

model_params.update(data_params)

Formatting train-valid-test splits.
Setting scalers with training data...


In [10]:
from libs.tft_model import TemporalFusionTransformer

# Specify GPU usage
tf_config = utils.get_default_tensorflow_config(tf_device="gpu", gpu_id=0)

Selecting GPU ID=0


In [11]:
tf.compat.v1.reset_default_graph()
with tf.Graph().as_default(), tf.compat.v1.Session(config=tf_config) as sess:

    tf.compat.v1.keras.backend.set_session(sess)
    
    # Create a TFT model
    model = TemporalFusionTransformer(model_params, 
                                    use_cudnn=False) # Run model on GPU using CuDNNLSTM cells

    """
    # Sample data into minibatches for training
    if not model.training_data_cached():
        model.cache_batched_data(train, "train", num_samples=50)
        model.cache_batched_data(valid, "valid", num_samples=10)

    # Train and save model
    # model.fit()
    # model.save(model_folder)
    """

Resetting temp folder...
*** TemporalFusionTransformer params ***
# dropout_rate = 0.3
# hidden_layer_size = 320
# learning_rate = 0.001
# minibatch_size = 128
# max_gradient_norm = 100.0
# num_heads = 4
# stack_size = 1
# model_folder = /content/drive/My Drive/TFT for TSF/tft/outputs/saved_models/traffic/fixed
# total_time_steps = 192
# num_encoder_steps = 168
# num_epochs = 100
# early_stopping_patience = 5
# multiprocessing_workers = 5
# column_definition = [('id', <DataTypes.REAL_VALUED: 0>, <InputTypes.ID: 4>), ('hours_from_start', <DataTypes.REAL_VALUED: 0>, <InputTypes.TIME: 5>), ('values', <DataTypes.REAL_VALUED: 0>, <InputTypes.TARGET: 0>), ('time_on_day', <DataTypes.REAL_VALUED: 0>, <InputTypes.KNOWN_INPUT: 2>), ('day_of_week', <DataTypes.REAL_VALUED: 0>, <InputTypes.KNOWN_INPUT: 2>), ('hours_from_start', <DataTypes.REAL_VALUED: 0>, <InputTypes.KNOWN_INPUT: 2>), ('categorical_id', <DataTypes.CATEGORICAL: 1>, <InputTypes.STATIC_INPUT: 3>)]
# input_size = 5
# output_size = 1
# 

In [24]:
test.iloc[0:1000, :].shape

(1000, 11)

## Evaluation and Prediction using the pre-trained model

In [25]:
tf.reset_default_graph()
with tf.Graph().as_default(), tf.Session(config=tf_config) as sess:

    tf.keras.backend.set_session(sess)
    
    # Create a new model & load weights
    model = TemporalFusionTransformer(model_params, 
                                    use_cudnn=False)
    model.load(model_folder)
    
    # Make forecasts
    output_map = model.predict(test.iloc[0:1000, :], return_targets=True)

    targets = data_formatter.format_predictions(output_map["targets"])

    # Format predictions
    p50_forecast = data_formatter.format_predictions(output_map["p50"])
    p90_forecast = data_formatter.format_predictions(output_map["p90"])

    def extract_numerical_data(data):
        """Strips out forecast time and identifier columns."""
        return data[[
          col for col in data.columns
          if col not in {"forecast_time", "identifier"}
        ]]

    # Compute Losses
    p50_loss = utils.numpy_normalised_quantile_loss(
        extract_numerical_data(targets), extract_numerical_data(p50_forecast),
        0.5)
    p90_loss = utils.numpy_normalised_quantile_loss(
        extract_numerical_data(targets), extract_numerical_data(p90_forecast),
        0.9)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  0.07739288  0.03955202 -0.09887701  0.00948162 -0.21879433 -0.04601607
  0.09287839  0.00809182  0.01805272 -0.06296187 -0.14795761 -0.12241773
  0.05603879 -0.04685853 -0.15165152  0.01709236  0.09165497  0.06416281
 -0.16928847  0.11892383  0.17206778  0.11610553  0.02112488 -0.10520112
  0.05723071 -0.11360922 -0.11735214 -0.1713498  -0.06356564 -0.08091141
 -0.10178725 -0.1810047  -0.01998587  0.07070092  0.03897497 -0.2733673
  0.08302445 -0.0187608  -0.11678708 -0.06044887 -0.04079724 -0.07879058
  0.0324606   0.03260928 -0.23869754 -0.10700552  0.03089817 -0.11465174
 -0.26881588 -0.06532943  0.05993899 -0.11228778 -0.03248532 -0.06626307
  0.00819984 -0.01801739 -0.01816012 -0.15278673 -0.08721187  0.02464575
 -0.06214429  0.07527794  0.1145146  -0.10380276  0.04638393  0.04546439
  0.12673347 -0.21823458 -0.0522074   0.00416751 -0.07782676 -0.17062712
  0.11549252 -0.04223298 -0.11037263 -0.11125893 -0.17521772

In [26]:
p50_loss

t+0     0.065809
t+1     0.076967
t+2     0.082269
t+3     0.084646
t+4     0.086255
t+5     0.084886
t+6     0.085092
t+7     0.086312
t+8     0.086987
t+9     0.085360
t+10    0.085060
t+11    0.084944
t+12    0.084937
t+13    0.085444
t+14    0.085260
t+15    0.084837
t+16    0.086007
t+17    0.087363
t+18    0.089672
t+19    0.091522
t+20    0.092482
t+21    0.092408
t+22    0.093196
t+23    0.092788
dtype: float64

In [27]:
p90_loss

t+0     0.032772
t+1     0.036503
t+2     0.044857
t+3     0.041505
t+4     0.040980
t+5     0.046467
t+6     0.048682
t+7     0.048603
t+8     0.050389
t+9     0.049548
t+10    0.048932
t+11    0.048150
t+12    0.047965
t+13    0.048577
t+14    0.050436
t+15    0.052087
t+16    0.054139
t+17    0.057625
t+18    0.061893
t+19    0.062612
t+20    0.062635
t+21    0.062864
t+22    0.063323
t+23    0.064972
dtype: float64