## Importing libraries and loading dataset

In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Embedding
import TCN.TCN.tcn as tcn
from TCN.TCN.tcn import TemporalConvNet
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import VotingRegressor
from sklearn.model_selection import train_test_split


In [2]:
df = pd.read_csv('train.csv', parse_dates=['date'], index_col = ['date'])
df.head()

Unnamed: 0_level_0,store,item,sales
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-01-01,1,1,13
2013-01-02,1,1,11
2013-01-03,1,1,14
2013-01-04,1,1,13
2013-01-05,1,1,10


In [3]:
#df = df.sort_values('date',ascending=True)
test = df[df.index.year == 2017]
test.reset_index(level=0, inplace= True)
train = df[df.index.year != 2017]
train.reset_index(level = 0, inplace = True)
train.head()

Unnamed: 0,date,store,item,sales
0,2013-01-01,1,1,13
1,2013-01-02,1,1,11
2,2013-01-03,1,1,14
3,2013-01-04,1,1,13
4,2013-01-05,1,1,10


# Dataset pre-processing

In [4]:
train_data = pd.DataFrame({'year': train['date'].dt.year-2013, 'month': train['date'].dt.month,
                           'day': train['date'].dt.day, 'weekday': train['date'].dt.weekday,
                           'store': train['store'], 'item': train['item'], 'sales': train['sales']},
                          columns =['year', 'month', 'day', 'weekday', 'store', 'item', 'sales'])

test_data = pd.DataFrame({'year': test['date'].dt.year-2013, 'month': test['date'].dt.month,
                           'day': test['date'].dt.day, 'weekday': test['date'].dt.weekday,
                           'store': test['store'], 'item': test['item'], 'sales': test['sales']},
                          columns =['year', 'month', 'day', 'weekday', 'store', 'item', 'sales'])


In [5]:
print(train_data.head())
print(train_data.shape)

   year  month  day  weekday  store  item  sales
0     0      1    1        1      1     1     13
1     0      1    2        2      1     1     11
2     0      1    3        3      1     1     14
3     0      1    4        4      1     1     13
4     0      1    5        5      1     1     10
(730500, 7)


In [6]:
X = np.array(train_data.drop('sales', axis = 1))
y = np.array(train_data['sales'])
X_test = np.array(test_data.drop('sales', axis = 1))
y_test = np.array(test_data['sales'])

In [7]:

def split_data(X_train, y_train, val_ratio = 0.2, val_year = 3, half_yearly = 1, randomly = True):
    
    # Splitting randomly
    if randomly:
        X_tr, y_tr, X_val, y_val = train_test_split(X_train, y_train, test_size = (val_ratio),
                                                          random_state = 6, shuffle = True)
    else:
        if half_yearly == 1:        #if validation data is first 6 months of val_year
            
            X_tr = X_train[(X_train[:,0]!=val_year) | (X_train[:,1]>6)]   #if not val_year or in last 6 months of year
            y_tr = y_train[(X_train[:,0]!=val_year) | (X_train[:,1]>6)]
            
            X_val = X_train[(X_train[:,0]==val_year) & (X_train[:,1]<=6)] #if val_year and first 6 months of year
            y_val = y_train[(X_train[:,0]==val_year) & (X_train[:,1]<=6)]
            
        else:                       #if validation data is last 6 months of val_year
            
            X_tr = X_train[(X_train[:,0]!=val_year) | (X_train[:,1]<=6)]  #if not val_year or in first 6 months of year
            y_tr = y_train[(X_train[:,0]!=val_year) | (X_train[:,1]<=6)]
            
            X_val = X_train[(X_train[:,0]==val_year) & (X_train[:,1]>6)]  #if val_year and last 6 months of year
            y_val = y_train[(X_train[:,0]==val_year) & (X_train[:,1]>6)]
            
        return X_tr, y_tr, X_val, y_val

In [8]:
X_train, y_train, X_val, y_val = split_data(X, y, False, 0.3, 3, 0)
print("Training:", X_train.shape, y_train.shape)
print("Validation:", X_val.shape, y_val.shape)

Training: (730500, 6) (730500,)
Validation: (0, 6) (0,)


# Creating TCN+LSTM hybrid model

In [9]:
# For embeddings, the thumb rule is, num_embeddings = no. of unique valus in category + 1 
# & embedding_dim = min(50,feat_dim(num_embeddings)/2)
dims = [np.unique(X_train[:,i]).size+1 for i in range(X_train.shape[1])]
print(dims)
embedding_dim = [(x, min(50, (x+1)//2)) for x in dims]
print(embedding_dim)

[5, 13, 32, 8, 11, 51]
[(5, 3), (13, 7), (32, 16), (8, 4), (11, 6), (51, 26)]


In [10]:
# Creating class for TCN+LSTM hybrid model
class LSTMTCNwithEmbeddings(nn.Module):
    def __init__(self, embedding_dim,  n_cont, out_size, ker_size, dense_layers,  num_channels = [1], dp = 0.3): #n_cont=no. of cont. feat. in dataframe
        super(LSTMTCNwithEmbeddings,self).__init__()
        self.embeds = nn.ModuleList([nn.Embedding(inp,out) for inp,out in embedding_dim])
        #print(self.embeds)
        self.emb_drop = nn.Dropout(dp)
        
        layer_list = []
        n_emb = sum((out for inp,out in embedding_dim))
        n_in = n_emb + n_cont
        n_hidden = [112,96]
        #n_dense = [64, 32]
        
        self.tcn1 = TemporalConvNet(n_in, [112], kernel_size=ker_size, dropout=dp)
        #self.lstm1 = nn.LSTM(n_in, n_hidden[0], 1, batch_first = True) #(no. of inputs, hidden_size, num_layers)
        self.lstm1 = nn.LSTM(n_hidden[0], n_hidden[1], 1, batch_first = True)
        n_in_dense = n_hidden[1]    
        for i in dense_layers:
            layer_list.append(nn.Linear(n_in_dense, i))
            layer_list.append(nn.ReLU(inplace = True))
            #layer_list.append(nn.BatchNorm1d(i))
            n_in_dense = i
            
        layer_list.append(nn.Dropout(dp))
        layer_list.append(nn.Linear(n_in_dense, out_size))

        self.dense_layers = nn.Sequential(*layer_list)


    def forward(self, X_cat, X_cont):
        embeddings = []
        n_hidden = [112, 96]
        batch_size = 365
        seq_len = 1
        for i, e in enumerate(self.embeds):
            embeddings.append(e(X_cat[:,i]))
        X = torch.cat(embeddings, axis =1)
        X = self.emb_drop(X)
        X = torch.cat([X, torch.unsqueeze(X_cont,1)], 1)
        
        X = X.reshape(1,X.size(1),X.size(0))
        #print(X.size())
        out = self.tcn1(X)
        #print(out.size())
        h_1 = torch.randn(1, out.size(2), n_hidden[1]) #hidden state  ##(num_layers, batch_size, hidden_size)
        c_1 = torch.randn(1, out.size(2), n_hidden[1]) #internal state/cell state
        #print(h_1.size(), c_1.size())
        h_out1, c_out1 = self.lstm1(out.reshape(out.size(2), 1, out.size(1)), (h_1,c_1)) #((batch_size(no. of elements in batch),seq_len, input_shape),(hidden))
        #h_out2, c_out2 = self.lstm2(h_out1, (h_2,c_2))
        #print(h_out1.size())
        f_out = self.dense_layers(h_out1)
        return f_out.reshape(f_out.size(0),-1)


In [11]:
# Initiating TCN+LSTM hybrid model with 1 TCN layer, 1 LSTM layer, 2 Fully connected layers and an output layer 
# having 112,96,64 & 32 nodes respectively

LSTMTCNmodel = LSTMTCNwithEmbeddings(embedding_dim[1:], 1, 1, ker_size=2,dense_layers=[64,32], num_channels=[1], dp=0)
LSTMTCNmodel

LSTMTCNwithEmbeddings(
  (embeds): ModuleList(
    (0): Embedding(13, 7)
    (1): Embedding(32, 16)
    (2): Embedding(8, 4)
    (3): Embedding(11, 6)
    (4): Embedding(51, 26)
  )
  (emb_drop): Dropout(p=0, inplace=False)
  (tcn1): TemporalConvNet(
    (network): Sequential(
      (0): TemporalBlock(
        (conv1): Conv1d(60, 112, kernel_size=(2,), stride=(1,), padding=(1,))
        (chomp1): Chomp1d()
        (relu1): ReLU()
        (dropout1): Dropout(p=0, inplace=False)
        (conv2): Conv1d(112, 112, kernel_size=(2,), stride=(1,), padding=(1,))
        (chomp2): Chomp1d()
        (relu2): ReLU()
        (dropout2): Dropout(p=0, inplace=False)
        (net): Sequential(
          (0): Conv1d(60, 112, kernel_size=(2,), stride=(1,), padding=(1,))
          (1): Chomp1d()
          (2): ReLU()
          (3): Dropout(p=0, inplace=False)
          (4): Conv1d(112, 112, kernel_size=(2,), stride=(1,), padding=(1,))
          (5): Chomp1d()
          (6): ReLU()
          (7): Dropout

In [12]:
# custom loss function (optional use)
def smape(x,y):
    return 100*torch.mean(2*torch.abs(x-y)/(torch.abs(x)+torch.abs(y)))

optim = torch.optim.Adam(LSTMTCNmodel.parameters(), lr = 0.01)
lossfn = F.mse_loss

# Training

In [13]:
def fit(epochs, sets, model, X_train, y_train, lossfn, optimizer):
    ep_count = 0
    for i in range(sets//2):
        for j in range(2):
            X_tr, y_tr, X_val, y_val = split_data(X_train, y_train, val_year=i, half_yearly=j, randomly=False)
            losses = []
            for k in range(epochs):
                k+=1
                y_pred = LSTMTCNmodel(torch.from_numpy(X_tr[:,1:]), torch.from_numpy(X_tr[:,0]))
                y_tr = torch.tensor(y_tr,dtype=torch.float).reshape(-1,1)
                loss = lossfn(y_pred,torch.tensor(y_tr))
                losses.append(loss)
                #if k%2 == 1:
                print("Epoch number {} of validation year 20{} and half {} has MSE loss {}".format(k,13+i,j,loss.item()))
                ep_count+=1
                
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()

                if k%4 == 0:
                    with torch.no_grad():
                        yhat_val = LSTMTCNmodel(torch.from_numpy(X_val[:,1:]), torch.from_numpy(X_val[:,0]))
                        y_val = torch.tensor(y_val,dtype=torch.float).reshape(-1,1)
                        val_loss = torch.sqrt(lossfn(torch.tensor(y_val), yhat_val))
                    print("Validation loss at epoch {} of year 20{} half {} is {:.4f}".format(k,13+i,j,val_loss.item()))
    
    print("Total number of epochs is", ep_count)

In [14]:
import time

In [15]:
begin = time.time()
fit(10, 8, LSTMTCNmodel, X, y, lossfn, optim)
end = time.time()


  loss = lossfn(y_pred,torch.tensor(y_tr))


Epoch number 1 of validation year 2013 and half 0 has MSE loss 3420.99560546875


  y_tr = torch.tensor(y_tr,dtype=torch.float).reshape(-1,1)


Epoch number 2 of validation year 2013 and half 0 has MSE loss 3399.58056640625
Epoch number 3 of validation year 2013 and half 0 has MSE loss 3326.6044921875
Epoch number 4 of validation year 2013 and half 0 has MSE loss 3177.843994140625


  val_loss = torch.sqrt(lossfn(torch.tensor(y_val), yhat_val))


Validation loss at epoch 4 of year 2013 half 0 is 46.6387
Epoch number 5 of validation year 2013 and half 0 has MSE loss 2917.8310546875
Epoch number 6 of validation year 2013 and half 0 has MSE loss 2608.613525390625
Epoch number 7 of validation year 2013 and half 0 has MSE loss 2237.07763671875
Epoch number 8 of validation year 2013 and half 0 has MSE loss 1805.60302734375


  y_val = torch.tensor(y_val,dtype=torch.float).reshape(-1,1)


Validation loss at epoch 8 of year 2013 half 0 is 29.7601
Epoch number 9 of validation year 2013 and half 0 has MSE loss 1357.64599609375
Epoch number 10 of validation year 2013 and half 0 has MSE loss 975.9345703125
Epoch number 1 of validation year 2013 and half 1 has MSE loss 797.5214233398438
Epoch number 2 of validation year 2013 and half 1 has MSE loss 953.1113891601562
Epoch number 3 of validation year 2013 and half 1 has MSE loss 1278.9234619140625
Epoch number 4 of validation year 2013 and half 1 has MSE loss 1365.5224609375
Validation loss at epoch 4 of year 2013 half 1 is 38.3680
Epoch number 5 of validation year 2013 and half 1 has MSE loss 1214.736572265625
Epoch number 6 of validation year 2013 and half 1 has MSE loss 1000.3665771484375
Epoch number 7 of validation year 2013 and half 1 has MSE loss 847.5330810546875
Epoch number 8 of validation year 2013 and half 1 has MSE loss 796.21142578125
Validation loss at epoch 8 of year 2013 half 1 is 23.5881
Epoch number 9 of val

In [16]:
print("Time taken for training in a Ryzen 5 Hexa core 4600H CPU is {:.2f} minutes".format((end-begin)/60))

Time taken for training in a Ryzen 5 Hexa core 4600H CPU is 19.70 minutes


# Testing

In [17]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
#import torch_metrics as tm

In [18]:
def show_metrics(test, y_pred_final):
    metrics = {'R2_score': r2_score(test, y_pred_final), 'MAE': mean_absolute_error(test, y_pred_final),
               'RMSE': mean_squared_error(test, y_pred_final, squared=False),
               'MAPE': mean_absolute_percentage_error(test, y_pred_final)}
    adj_R2 = 1-(1-metrics['R2_score'])*(len(test)-1)/(len(test)-6-1)      #num of indep var = 6
    metrics['adj_R2'] = adj_R2
    print("R2 score on test set is", metrics['R2_score'])
    print("Mean Absolute Error on test set is", metrics['MAE'])
    print("Root Mean Square error on test set is", metrics['RMSE'])
    print("Mean Absolute Percentage Error on test set is", metrics['MAPE'])
    print("Adjusted R2 score on test set is", adj_R2)
    
    return metrics

In [19]:
predictions = LSTMTCNmodel(torch.from_numpy(X_test[:,1:]), torch.from_numpy(X_test[:,0]))
test_results = show_metrics(y_test, predictions.detach().numpy())

R2 score on test set is -0.0698741653697148
Mean Absolute Error on test set is 25.29713485937249
Root Mean Square error on test set is 32.63635535486093
Mean Absolute Percentage Error on test set is 0.5577620565618376
Adjusted R2 score on test set is -0.06990934066406718


# Saving the model

In [21]:
torch.save(LSTMTCNmodel.state_dict(), 'TCN+LSTM.pth')