## Cloning TCN module for pytorch, importing modules and loading dataset

In [1]:
#! git clone https://github.com/locuslab/TCN.git

In [2]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Embedding
import TCN.TCN.tcn as tcn
from TCN.TCN.tcn import TemporalConvNet
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import VotingRegressor
from sklearn.model_selection import train_test_split


In [3]:
df = pd.read_csv('train.csv', parse_dates=['date'], index_col = ['date'])
df.head()

Unnamed: 0_level_0,store,item,sales
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-01-01,1,1,13
2013-01-02,1,1,11
2013-01-03,1,1,14
2013-01-04,1,1,13
2013-01-05,1,1,10


In [4]:
#df = df.sort_values('date',ascending=True)
test = df[df.index.year == 2017]
test.reset_index(level=0, inplace= True)
train = df[df.index.year != 2017]
train.reset_index(level = 0, inplace = True)
train.head()

Unnamed: 0,date,store,item,sales
0,2013-01-01,1,1,13
1,2013-01-02,1,1,11
2,2013-01-03,1,1,14
3,2013-01-04,1,1,13
4,2013-01-05,1,1,10


# Dataset pre-processing

In [5]:
train_data = pd.DataFrame({'year': train['date'].dt.year-2013, 'month': train['date'].dt.month,
                           'day': train['date'].dt.day, 'weekday': train['date'].dt.weekday,
                           'store': train['store'], 'item': train['item'], 'sales': train['sales']},
                          columns =['year', 'month', 'day', 'weekday', 'store', 'item', 'sales'])

test_data = pd.DataFrame({'year': test['date'].dt.year-2013, 'month': test['date'].dt.month,
                           'day': test['date'].dt.day, 'weekday': test['date'].dt.weekday,
                           'store': test['store'], 'item': test['item'], 'sales': test['sales']},
                          columns =['year', 'month', 'day', 'weekday', 'store', 'item', 'sales'])


In [6]:
print(train_data.head())
print(train_data.shape)

   year  month  day  weekday  store  item  sales
0     0      1    1        1      1     1     13
1     0      1    2        2      1     1     11
2     0      1    3        3      1     1     14
3     0      1    4        4      1     1     13
4     0      1    5        5      1     1     10
(730500, 7)


In [7]:
X = np.array(train_data.drop('sales', axis = 1))
y = np.array(train_data['sales'])
X_test = np.array(test_data.drop('sales', axis = 1))
y_test = np.array(test_data['sales'])

In [8]:

def split_data(X_train, y_train, val_ratio = 0.2, val_year = 3, half_yearly = 1, randomly = True):
    
    # Splitting randomly
    if randomly:
        X_tr, y_tr, X_val, y_val = train_test_split(X_train, y_train, test_size = (val_ratio),
                                                          random_state = 6, shuffle = True)
    else:
        if half_yearly == 1:        #if validation data is first 6 months of val_year
            
            X_tr = X_train[(X_train[:,0]!=val_year) | (X_train[:,1]>6)]   #if not val_year or in last 6 months of year
            y_tr = y_train[(X_train[:,0]!=val_year) | (X_train[:,1]>6)]
            
            X_val = X_train[(X_train[:,0]==val_year) & (X_train[:,1]<=6)] #if val_year and first 6 months of year
            y_val = y_train[(X_train[:,0]==val_year) & (X_train[:,1]<=6)]
            
        else:                       #if validation data is last 6 months of val_year
            
            X_tr = X_train[(X_train[:,0]!=val_year) | (X_train[:,1]<=6)]  #if not val_year or in first 6 months of year
            y_tr = y_train[(X_train[:,0]!=val_year) | (X_train[:,1]<=6)]
            
            X_val = X_train[(X_train[:,0]==val_year) & (X_train[:,1]>6)]  #if val_year and last 6 months of year
            y_val = y_train[(X_train[:,0]==val_year) & (X_train[:,1]>6)]
            
        return X_tr, y_tr, X_val, y_val

In [9]:
X_train, y_train, X_val, y_val = split_data(X, y, False, 0.3, 3, 0)
print("Training:", X_train.shape, y_train.shape)
print("Validation:", X_val.shape, y_val.shape)

Training: (730500, 6) (730500,)
Validation: (0, 6) (0,)


# Creating Temporal Convolutional Neural Network model

In [10]:
# For embeddings, the thumb rule is, num_embeddings = no. of unique valus in category + 1 
# & embedding_dim = min(50,feat_dim(num_embeddings)/2)
dims = [np.unique(X_train[:,i]).size+1 for i in range(X_train.shape[1])]
print(dims)
embedding_dim = [(x, min(50, (x+1)//2)) for x in dims]
print(embedding_dim)

[5, 13, 32, 8, 11, 51]
[(5, 3), (13, 7), (32, 16), (8, 4), (11, 6), (51, 26)]


In [11]:
# Creating class for TCN
class TCNwithEmbeddings(nn.Module):
    def __init__(self, embedding_dim, n_cont, out_size, ker_size, dense_layers,  num_channels = [1], dp = 0.3):    #n_cont=no. of cont. feat. in dataframe
        super(TCNwithEmbeddings,self).__init__()
        self.embeds = nn.ModuleList([nn.Embedding(inp,out) for inp,out in embedding_dim])
        self.emb_drop = nn.Dropout(dp)
        
        layer_list = []
        n_emb = sum((out for inp,out in embedding_dim))
        n_in = n_emb + n_cont
        self.tcn1 = TemporalConvNet(n_in, [112], kernel_size=ker_size, dropout=dp)
        self.tcn2 = TemporalConvNet(112, [96], kernel_size=ker_size, dropout=dp)
        inp_s = 96
        for i in dense_layers:
            layer_list.append(nn.Linear(inp_s, i))
            layer_list.append(nn.ReLU(inplace = True))
            #layer_list.append(nn.BatchNorm1d(i))
            layer_list.append(nn.Dropout(dp))
            inp_s = i
        layer_list.append(nn.Linear(dense_layers[-1], out_size))
        
        self.layers = nn.Sequential(*layer_list)
        
    def forward(self, X_cat, X_cont):
        embeddings = []
        for i, e in enumerate(self.embeds):
            embeddings.append(e(X_cat[:,i]))
        X = torch.cat(embeddings, axis =1)
        X = self.emb_drop(X)

        X = torch.cat([X, torch.unsqueeze(X_cont,1)], 1)
        X = X.reshape(1,X.size(1),X.size(0))
        out = self.tcn1(X)
        out = self.tcn2(out)
        out = self.layers(out.view(out.size(2),out.size(1)))
        return out


In [12]:
# Initiating TCN model with 2 TCN layers, 2 Fully connected layers and an output layer 
# having 112,96,64 & 32 nodes respectively

TCNmodel = TCNwithEmbeddings(embedding_dim[1:], 1, 1, ker_size=2,dense_layers=[64,32], num_channels=[1], dp=0)
TCNmodel

TCNwithEmbeddings(
  (embeds): ModuleList(
    (0): Embedding(13, 7)
    (1): Embedding(32, 16)
    (2): Embedding(8, 4)
    (3): Embedding(11, 6)
    (4): Embedding(51, 26)
  )
  (emb_drop): Dropout(p=0, inplace=False)
  (tcn1): TemporalConvNet(
    (network): Sequential(
      (0): TemporalBlock(
        (conv1): Conv1d(60, 112, kernel_size=(2,), stride=(1,), padding=(1,))
        (chomp1): Chomp1d()
        (relu1): ReLU()
        (dropout1): Dropout(p=0, inplace=False)
        (conv2): Conv1d(112, 112, kernel_size=(2,), stride=(1,), padding=(1,))
        (chomp2): Chomp1d()
        (relu2): ReLU()
        (dropout2): Dropout(p=0, inplace=False)
        (net): Sequential(
          (0): Conv1d(60, 112, kernel_size=(2,), stride=(1,), padding=(1,))
          (1): Chomp1d()
          (2): ReLU()
          (3): Dropout(p=0, inplace=False)
          (4): Conv1d(112, 112, kernel_size=(2,), stride=(1,), padding=(1,))
          (5): Chomp1d()
          (6): ReLU()
          (7): Dropout(p=0

In [13]:
# custom loss function (for optional use)
def smape(x,y):
    return 100*torch.mean(2*torch.abs(x-y)/(torch.abs(x)+torch.abs(y)))

optim = torch.optim.Adam(TCNmodel.parameters(), lr = 0.01)
lossfn = F.mse_loss

# Training

In [14]:
def fit(epochs, sets, model, X_train, y_train, lossfn, optimizer):
    ep_count = 0
    for i in range(sets//2):
        for j in range(2):
            X_tr, y_tr, X_val, y_val = split_data(X_train, y_train, val_year=i, half_yearly=j, randomly=False)
            losses = []
            for k in range(epochs):
                k+=1
                y_pred = TCNmodel(torch.from_numpy(X_tr[:,1:]), torch.from_numpy(X_tr[:,0]))
                y_tr = torch.tensor(y_tr,dtype=torch.float).reshape(-1,1)
                loss = lossfn(y_pred,torch.tensor(y_tr))
                losses.append(loss)
                print("Epoch number {} of validation year 20{} and half {} has MSE loss {}".format(k,13+i,j,loss.item()))
                ep_count+=1
                
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()

                if k%10 == 0:
                    with torch.no_grad():
                        yhat_val = TCNmodel(torch.from_numpy(X_val[:,1:]), torch.from_numpy(X_val[:,0]))
                        y_val = torch.tensor(y_val,dtype=torch.float).reshape(-1,1)
                        val_loss = torch.sqrt(lossfn(torch.tensor(y_val), yhat_val))
                    print("Validation loss at epoch {} of year 20{} half {} is {:.4f}".format(k,13+i,j,val_loss.item()))
    
    print("Total number of epochs is", ep_count)

In [15]:
import time
begin = time.time()
fit(50, 8, TCNmodel, X, y, lossfn, optim)
end = time.time()


  loss = lossfn(y_pred,torch.tensor(y_tr))


Epoch number 1 of validation year 2013 and half 0 has MSE loss 3435.500244140625


  y_tr = torch.tensor(y_tr,dtype=torch.float).reshape(-1,1)


Epoch number 2 of validation year 2013 and half 0 has MSE loss 3427.930908203125
Epoch number 3 of validation year 2013 and half 0 has MSE loss 3366.068115234375
Epoch number 4 of validation year 2013 and half 0 has MSE loss 3056.044189453125
Epoch number 5 of validation year 2013 and half 0 has MSE loss 3032.1240234375
Epoch number 6 of validation year 2013 and half 0 has MSE loss 2656.79345703125
Epoch number 7 of validation year 2013 and half 0 has MSE loss 2698.4970703125
Epoch number 8 of validation year 2013 and half 0 has MSE loss 2637.587158203125
Epoch number 9 of validation year 2013 and half 0 has MSE loss 2557.9013671875
Epoch number 10 of validation year 2013 and half 0 has MSE loss 2659.690185546875


  val_loss = torch.sqrt(lossfn(torch.tensor(y_val), yhat_val))


Validation loss at epoch 10 of year 2013 half 0 is 44.3446
Epoch number 11 of validation year 2013 and half 0 has MSE loss 2548.502685546875
Epoch number 12 of validation year 2013 and half 0 has MSE loss 2541.233154296875
Epoch number 13 of validation year 2013 and half 0 has MSE loss 2560.31884765625
Epoch number 14 of validation year 2013 and half 0 has MSE loss 2520.39111328125
Epoch number 15 of validation year 2013 and half 0 has MSE loss 2474.1640625
Epoch number 16 of validation year 2013 and half 0 has MSE loss 2507.276123046875
Epoch number 17 of validation year 2013 and half 0 has MSE loss 2473.999755859375
Epoch number 18 of validation year 2013 and half 0 has MSE loss 2443.62451171875
Epoch number 19 of validation year 2013 and half 0 has MSE loss 2453.67431640625
Epoch number 20 of validation year 2013 and half 0 has MSE loss 2444.47509765625


  y_val = torch.tensor(y_val,dtype=torch.float).reshape(-1,1)


Validation loss at epoch 20 of year 2013 half 0 is 42.6721
Epoch number 21 of validation year 2013 and half 0 has MSE loss 2412.85205078125
Epoch number 22 of validation year 2013 and half 0 has MSE loss 2402.53955078125
Epoch number 23 of validation year 2013 and half 0 has MSE loss 2395.778076171875
Epoch number 24 of validation year 2013 and half 0 has MSE loss 2368.671875
Epoch number 25 of validation year 2013 and half 0 has MSE loss 2349.00048828125
Epoch number 26 of validation year 2013 and half 0 has MSE loss 2337.1728515625
Epoch number 27 of validation year 2013 and half 0 has MSE loss 2316.575927734375
Epoch number 28 of validation year 2013 and half 0 has MSE loss 2289.305908203125
Epoch number 29 of validation year 2013 and half 0 has MSE loss 2267.835205078125
Epoch number 30 of validation year 2013 and half 0 has MSE loss 2245.180419921875
Validation loss at epoch 30 of year 2013 half 0 is 40.8194
Epoch number 31 of validation year 2013 and half 0 has MSE loss 2211.7136

Epoch number 15 of validation year 2014 and half 0 has MSE loss 601.2650146484375
Epoch number 16 of validation year 2014 and half 0 has MSE loss 600.5956420898438
Epoch number 17 of validation year 2014 and half 0 has MSE loss 600.885986328125
Epoch number 18 of validation year 2014 and half 0 has MSE loss 600.2864990234375
Epoch number 19 of validation year 2014 and half 0 has MSE loss 599.8524169921875
Epoch number 20 of validation year 2014 and half 0 has MSE loss 599.6129150390625
Validation loss at epoch 20 of year 2014 half 0 is 24.2148
Epoch number 21 of validation year 2014 and half 0 has MSE loss 598.67626953125
Epoch number 22 of validation year 2014 and half 0 has MSE loss 598.298583984375
Epoch number 23 of validation year 2014 and half 0 has MSE loss 597.5454711914062
Epoch number 24 of validation year 2014 and half 0 has MSE loss 596.8092651367188
Epoch number 25 of validation year 2014 and half 0 has MSE loss 596.3521728515625
Epoch number 26 of validation year 2014 and

Epoch number 10 of validation year 2015 and half 0 has MSE loss 557.7366333007812
Validation loss at epoch 10 of year 2015 half 0 is 25.0553
Epoch number 11 of validation year 2015 and half 0 has MSE loss 557.4647827148438
Epoch number 12 of validation year 2015 and half 0 has MSE loss 558.4695434570312
Epoch number 13 of validation year 2015 and half 0 has MSE loss 557.883544921875
Epoch number 14 of validation year 2015 and half 0 has MSE loss 556.7520141601562
Epoch number 15 of validation year 2015 and half 0 has MSE loss 556.5944213867188
Epoch number 16 of validation year 2015 and half 0 has MSE loss 557.184326171875
Epoch number 17 of validation year 2015 and half 0 has MSE loss 557.2127685546875
Epoch number 18 of validation year 2015 and half 0 has MSE loss 556.3094482421875
Epoch number 19 of validation year 2015 and half 0 has MSE loss 555.9057006835938
Epoch number 20 of validation year 2015 and half 0 has MSE loss 556.2853393554688
Validation loss at epoch 20 of year 2015 

Epoch number 4 of validation year 2016 and half 0 has MSE loss 523.5337524414062
Epoch number 5 of validation year 2016 and half 0 has MSE loss 524.0191650390625
Epoch number 6 of validation year 2016 and half 0 has MSE loss 520.2859497070312
Epoch number 7 of validation year 2016 and half 0 has MSE loss 523.6299438476562
Epoch number 8 of validation year 2016 and half 0 has MSE loss 522.2738647460938
Epoch number 9 of validation year 2016 and half 0 has MSE loss 520.477783203125
Epoch number 10 of validation year 2016 and half 0 has MSE loss 523.5716552734375
Validation loss at epoch 10 of year 2016 half 0 is 28.6523
Epoch number 11 of validation year 2016 and half 0 has MSE loss 520.6221923828125
Epoch number 12 of validation year 2016 and half 0 has MSE loss 521.19970703125
Epoch number 13 of validation year 2016 and half 0 has MSE loss 521.6058349609375
Epoch number 14 of validation year 2016 and half 0 has MSE loss 519.6415405273438
Epoch number 15 of validation year 2016 and half

Epoch number 49 of validation year 2016 and half 1 has MSE loss 532.4546508789062
Epoch number 50 of validation year 2016 and half 1 has MSE loss 532.3786010742188
Validation loss at epoch 50 of year 2016 half 1 is 25.0402
Total number of epochs is 400


In [16]:
print("Time taken for training in a Ryzen 5 Hexa core 4600H CPU is {:.2f} minutes".format((end-begin)/60))

Time taken for training in a Ryzen 5 Hexa core 4600H CPU is 97.96 minutes



# Testing

In [17]:
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
#import torch_metrics as tm

In [18]:
def show_metrics(test, y_pred_final):
    metrics = {'R2_score': r2_score(test, y_pred_final), 'MAE': mean_absolute_error(test, y_pred_final),
               'RMSE': mean_squared_error(test, y_pred_final, squared=False),
               'MAPE': mean_absolute_percentage_error(test, y_pred_final)}
    adj_R2 = 1-(1-metrics['R2_score'])*(len(test)-1)/(len(test)-6-1)      #num of indep var = 6
    metrics['adj_R2'] = adj_R2
    print("R2 score on test set is", metrics['R2_score'])
    print("Mean Absolute Error on test set is", metrics['MAE'])
    print("Root Mean Square error on test set is", metrics['RMSE'])
    print("Mean Absolute Percentage Error on test set is", metrics['MAPE'])
    print("Adjusted R2 score on test set is", adj_R2)
    
    return metrics

In [19]:
predictions = TCNmodel(torch.from_numpy(X_test[:,1:]), torch.from_numpy(X_test[:,0]))
test_results = show_metrics(y_test, predictions.detach().numpy())

R2 score on test set is 0.22736186203334408
Mean Absolute Error on test set is 20.519154600854115
Root Mean Square error on test set is 27.734693681675626
Mean Absolute Percentage Error on test set is 0.41503353019106237
Adjusted R2 score on test set is 0.22733645925719492


# Saving the model

In [20]:
torch.save(TCNmodel.state_dict(), 'TCN.pth')