In [1]:
%load_ext autoreload
%autoreload 2
import sys 
sys.path.insert(0, '../src/')
import pandas as pd 
import numpy as np 
from utils import * 
from hts.hierarchy import HierarchyTree 
from datetime import datetime 
import torch 
from proption_model import * 


In [2]:
sales_train_validation = pd.read_csv('../data/sales_train_validation.csv')
sales_train_evaluation = pd.read_csv('../data/sales_train_evaluation.csv') 
calender = pd.read_csv('../data/calendar.csv') 
date_to_d = dict(zip(calender.date, calender.d)) 
d_to_date = dict(zip(calender.d, calender.date)) 

## Parent nodes sales 

In [3]:
parent_sales = sales_train_validation[sales_train_validation.columns[6:]].sum(axis=0).values
print(parent_sales.shape)

(1913,)


## Time related covariates: 

- Wehther it is weekend/not, we observed sales are high overweekend across three categories 

In [4]:
date = [d_to_date[d] for d in sales_train_validation.columns[6:]]

In [5]:
print(f'The statrting date is {date[0]}')
print(f'The statrting date is {date[-1]}')

The statrting date is 2011-01-29
The statrting date is 2016-04-24


In [6]:
d = datetime.strptime(date[2], '%Y-%m-%d')
if d.weekday() > 4:
    print ('Given date is weekend.')
else:
    print ('Given data is weekday.')

Given data is weekday.


In [7]:
d.weekday()

0

In [8]:
weekend_binary = [1 if (datetime.strptime(d, '%Y-%m-%d')).weekday() > 4 else 0 for d in date]
print(len(weekend_binary))

1913


## Proportions

In [9]:
stv = sales_train_validation[sales_train_validation.columns[6:]]
stv = stv.T
print(stv.shape)

(1913, 30490)


In [10]:
len(stv.sum(axis=1).values)

1913

## Time series embeddings 

In [13]:
hie_index = torch.arange(stv.shape[1])

hie_index_2d = hie_index.expand(stv.shape[0], stv.shape[1])

hie_index_3d = hie_index_2d.reshape(
    hie_index_2d.shape[0], hie_index_2d.shape[-1], 1
)

print(hie_index_3d.shape)


torch.Size([1913, 30490, 1])


In [14]:
stv_proportions = np.divide(stv.values, stv.sum(axis=1).values.reshape(-1,1))
print(stv_proportions.sum(axis=1).sum())

stv_proportions_3d = stv_proportions.reshape(stv_proportions.shape[0], stv_proportions.shape[1], 1)
print(stv_proportions_3d.shape)

proportions_tensor = torch.tensor(stv_proportions_3d)
print(proportions_tensor.shape)

1913.0
(1913, 30490, 1)
torch.Size([1913, 30490, 1])


In [15]:

weekend_binary_tensor = torch.tensor(weekend_binary).unsqueeze_(-1).unsqueeze_(-1)
weekend_binary_tensor = weekend_binary_tensor.expand(stv_proportions.shape[0], stv_proportions.shape[1], weekend_binary_tensor.shape[-1])
print(weekend_binary_tensor.shape)
print(weekend_binary_tensor[1,0])
print(weekend_binary_tensor[2,0])


torch.Size([1913, 30490, 1])
tensor([1])
tensor([0])


In [16]:
parent_sales_tensor = torch.tensor(parent_sales).unsqueeze_(-1).unsqueeze_(-1)
parent_sales_tensor = parent_sales_tensor.expand(stv_proportions.shape[0], stv_proportions.shape[1], parent_sales_tensor.shape[-1])
print(parent_sales_tensor.shape)

torch.Size([1913, 30490, 1])


In [17]:
data_3d = torch.cat((proportions_tensor, parent_sales_tensor,weekend_binary_tensor, hie_index_3d), -1)
data_3d = data_3d.double()
print(data_3d.shape)

torch.Size([1913, 30490, 4])


In [18]:
# dimension about the dataset
no_child = proportions_tensor.shape[1]
History = 28
Forward = 28

number_observations = data_3d.shape[0] - (History + Forward) + 1

data_3d_time_batched = torch.empty(
    number_observations, History + Forward, data_3d.shape[1], data_3d.shape[2]
)

for i in range(number_observations):

    data_3d_time_batched[i, :, :, :] = data_3d[i : i + History + Forward, :, :]

print(data_3d_time_batched.shape)
print(data_3d_time_batched[-1,:,:,:].shape)


#if torch.equal(data_3d_time_batched[-1, -1, :, :].double(), data_3d[-1, :, :].double()):

print("data correctly processed to generate time-bacted tensor")

input_tensor = torch.empty(
    number_observations,
    History,
    data_3d_time_batched.shape[-2],
    data_3d_time_batched.shape[-1],
)

## We first use the recursive predicitng mechanism in LSTM, in the future we release more blocks that adapt to teacher-forcing/mixed training
target_tensor = torch.empty(
    number_observations,
    Forward,
    data_3d_time_batched.shape[-2],
    1
    # data_3d_time_batched.shape[-1]
)

print(input_tensor.shape)
print(target_tensor.shape)

print("Entering the training pipeline")

for i in range(data_3d_time_batched.shape[0]):

    input_tensor[i] = data_3d_time_batched[i, :History, :, :]
    target_2d = data_3d_time_batched[i, History:, :, 0]
    target_tensor[i] = target_2d.reshape(
        target_2d.shape[0], target_2d.shape[1], 1
    )

    # print(input_tensor.shape)
    # print(target_tensor.shape)

print(input_tensor.shape)
print(target_tensor.shape)
    # print(target_tensor[-1,0,:,:].sum())

torch.Size([1858, 56, 30490, 4])
torch.Size([56, 30490, 4])
data correctly processed to generate time-bacted tensor
torch.Size([1858, 28, 30490, 4])
torch.Size([1858, 28, 30490, 1])
Entering the training pipeline
torch.Size([1858, 28, 30490, 4])
torch.Size([1858, 28, 30490, 1])


In [19]:
###---------- dimension on the model hypter-parameters from the paper ------------ ######
num_hts_embedd = no_child
hts_embedd_dim = 8
covariate_dim = 2 

lstm_input_dim = 2 + covariate_dim + hts_embedd_dim
lstm_hidden_dim = 48
lstm_num_layer = 1
lstm_output_dim = 64

mha_embedd_dim = lstm_output_dim
num_head = 4
num_attention_layer = 1
mha_output_dim = mha_embedd_dim
residual_output_dim = mha_output_dim
model_ouput_dim = 1

# define the model object
p_model = proportion_model(
    num_hts_embedd,
    hts_embedd_dim,  # ts embedding hyper pars
    lstm_input_dim,
    lstm_hidden_dim,
    lstm_num_layer,
    lstm_output_dim,  # lstm hyper pars
    mha_embedd_dim,
    num_head,
    num_attention_layer,  # mha hyper pars
    mha_output_dim,
    residual_output_dim,  # skip connection hyper pars
    model_ouput_dim,  # output later hyper pars
)

###---------- trainign parameters from the paper ------------ ######

n_epochs = 50
target_len = Forward
batch_size = 4
l_r = 0.00079

# start training
p_model.train(
    input_tensor,
    target_tensor,
    n_epochs,
    target_len,
    batch_size,
    learning_rate=l_r,
)

  0%|          | 0/50 [00:00<?, ?it/s]


RuntimeError: input.size(-1) must be equal to input_size. Expected 12, got 11