#### Imports

In [1]:
import numpy as np
import pandas as pd
import math

from tqdm.auto import tqdm
from sklearn.preprocessing import OrdinalEncoder
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import warnings
warnings.filterwarnings("ignore")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#### Data preprocessing

In [2]:
train = pd.read_csv('train.csv', parse_dates=['date'], index_col='id')
display(train.head())

Unnamed: 0_level_0,date,store_nbr,family,sales,onpromotion
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,2013-01-01,1,AUTOMOTIVE,0.0,0
1,2013-01-01,1,BABY CARE,0.0,0
2,2013-01-01,1,BEAUTY,0.0,0
3,2013-01-01,1,BEVERAGES,0.0,0
4,2013-01-01,1,BOOKS,0.0,0


In [3]:
stores = pd.read_csv('stores.csv', index_col='store_nbr')
encoder = OrdinalEncoder(dtype=np.int64)
cats = ['city', 'state', 'type', 'cluster']
stores[cats] = encoder.fit_transform(stores[cats])
display(stores.head())

Unnamed: 0_level_0,city,state,type,cluster
store_nbr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,18,12,3,12
2,18,12,3,12
3,18,12,3,7
4,18,12,3,8
5,21,14,3,3


In [4]:
display(train.info())
display(train.describe())
display(train.isna().sum())
display(train.head())

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3000888 entries, 0 to 3000887
Data columns (total 5 columns):
 #   Column       Dtype         
---  ------       -----         
 0   date         datetime64[ns]
 1   store_nbr    int64         
 2   family       object        
 3   sales        float64       
 4   onpromotion  int64         
dtypes: datetime64[ns](1), float64(1), int64(2), object(1)
memory usage: 137.4+ MB


None

Unnamed: 0,store_nbr,sales,onpromotion
count,3000888.0,3000888.0,3000888.0
mean,27.5,357.7757,2.60277
std,15.58579,1101.998,12.21888
min,1.0,0.0,0.0
25%,14.0,0.0,0.0
50%,27.5,11.0,0.0
75%,41.0,195.8473,0.0
max,54.0,124717.0,741.0


date           0
store_nbr      0
family         0
sales          0
onpromotion    0
dtype: int64

Unnamed: 0_level_0,date,store_nbr,family,sales,onpromotion
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,2013-01-01,1,AUTOMOTIVE,0.0,0
1,2013-01-01,1,BABY CARE,0.0,0
2,2013-01-01,1,BEAUTY,0.0,0
3,2013-01-01,1,BEVERAGES,0.0,0
4,2013-01-01,1,BOOKS,0.0,0


In [5]:
# labeling "family"
mapping = {value:key for key, value in enumerate(train.family.unique())}
inv_mapping = {key:value for key, value in enumerate(train.family.unique())}

train['family'].replace(to_replace=mapping, inplace=True)

In [6]:
# merging main df with categories
train = train.merge(stores, how='left', right_on='store_nbr', left_on='store_nbr')
train.head()

Unnamed: 0,date,store_nbr,family,sales,onpromotion,city,state,type,cluster
0,2013-01-01,1,0,0.0,0,18,12,3,12
1,2013-01-01,1,1,0.0,0,18,12,3,12
2,2013-01-01,1,2,0.0,0,18,12,3,12
3,2013-01-01,1,3,0.0,0,18,12,3,12
4,2013-01-01,1,4,0.0,0,18,12,3,12


#### Datasets and model classes

In [7]:
# custom dataset
# returns sequence, categories, last time and target variable
class Datataset_(Dataset):
    def __init__(self, df, sequence=49, predict_len=16, seq_start=0, seq_end=1684, test=False):
        self.df = df
        self.df_pt = train.pivot_table(index=['store_nbr', 'city', 'state', 'type', 'cluster', 'family'],
                                       columns=['date'], values=['sales'],
                                       sort=False)
        self.df_pt_prm = train.pivot_table(index=['store_nbr', 'city', 'state', 'type', 'cluster', 'family'],
                                           columns=['date'], values=['onpromotion'],
                                           sort=False)
        self.sequence = sequence
        self.predict_len = predict_len
        self.seq_start = seq_start
        self.seq_end =  seq_end
        self.test = test
        self.device = device
        
        self.unique = df['store_nbr'].nunique()*df['family'].nunique()
        self.seq_len_touse = (seq_end - seq_start) - predict_len - (sequence - 1)
        if self.test: self.seq_len_touse = (seq_end - seq_start) - (sequence - 1)
        self.len = self.unique*self.seq_len_touse
        self.df_pt_sales = self.df_pt[['sales']]
        self.df_pt_prm = self.df_pt_prm[['onpromotion']]
    
    def __getitem__(self, index):
        row = index//self.seq_len_touse
        index_str = (index - row*self.seq_len_touse) + (self.seq_start)
        index_end = (index - row*self.seq_len_touse) + (self.seq_start) + (self.sequence)
        
        if self.test:
            X = self.df_pt_sales.iloc[row, index_str:index_end]
            
            return torch.Tensor(X.values).view(1, -1),\
                   torch.Tensor(X.name[1:]),\
                   torch.Tensor(X\
                        .index\
                        .get_level_values(1)\
                        .astype(np.int64)//1e9)[-1].view(1, 1)
            
        X, y = self.df_pt_sales.iloc[row, index_str:index_end],\
               self.df_pt_sales.iloc[row, index_end:index_end+self.predict_len]
        
        return torch.Tensor(X.values).view(1, -1),\
               torch.Tensor(X.name[1:]),\
               torch.Tensor(X\
                            .index\
                            .get_level_values(1)\
                            .astype(np.int64)//1e9)[-1].view(1, 1),\
               torch.Tensor(y)
    
    def __len__(self):
        return self.len
    
    def __getsize__(self):
        return (self.__len__())

In [8]:
# timevector representation
# https://github.com/ojus1/Time2Vec-PyTorch
def t2v(tau, f, out_features, w, b, w0, b0):
    # k-1 periodic features
    v1 = f(torch.matmul(tau, w) + b)
    # One Non-periodic feature
    v2 = torch.matmul(tau, w0) + b0
    return torch.cat([v1, v2], dim=-1)

class SineActivation(nn.Module):
    def __init__(self, in_features, out_features):
        super(SineActivation, self).__init__()
        self.out_features = out_features
        self.w0 = nn.parameter.Parameter(torch.randn(in_features, 1))
        self.b0 = nn.parameter.Parameter(torch.randn(in_features, 1))
        self.w = nn.parameter.Parameter(torch.randn(in_features, out_features-1))
        self.b = nn.parameter.Parameter(torch.randn(in_features, out_features-1))
        self.f = torch.sin

    def forward(self, tau):
        return t2v(tau, self.f, self.out_features, self.w, self.b, self.w0, self.b0)

In [9]:
class Conv1d_and_Emb(nn.Module):
    def __init__(self, emb_szs, n_in=49, model_out=16, n_out=16):
        super(Conv1d_and_Emb, self).__init__()

        self.embs = nn.ModuleList([nn.Embedding(i, j) for i, j in emb_szs])
        self.bn = nn.BatchNorm1d(20)
        self.wide_bn = nn.BatchNorm1d(144)
        
        # self.model = nn.Conv1d(n_in, model_out, kernel_size=1) #nn.RNN(1, hidden_size=hidden_size, batch_first=True)
        self.conv = nn.Sequential(
                    nn.Conv1d(1, 256, kernel_size=84, stride=1),
                    # nn.AvgPool1d(kernel_size=7, stride=1),
                    nn.ReLU(),
                    nn.Conv1d(256, 512, kernel_size=28, stride=1),
                    # nn.AvgPool1d(kernel_size=7, stride=1),
                    nn.ReLU(),
                    nn.Conv1d(512, 512, kernel_size=7, stride=1),
                    nn.ReLU())
        
        self.sin = SineActivation(1, 16)
        self.fc = nn.Sequential(nn.Linear(144, 64),
                                nn.ReLU(),
                                nn.Linear(64, n_out),
                                nn.ReLU())

    def forward(self, x, c, t):

        embs = [emb(c[:, i]) for i, emb in enumerate(self.embs)]
        embs = torch.cat(embs, dim=-1)
        embs = self.bn(embs)

        t = self.sin(t).squeeze_(dim=1)
        # x = torch.concat([x, t], dim=1)
        x = self.conv(x)
        x = x.mean(dim=1).squeeze_(dim=1)
        x = torch.concat([x, embs, t], dim=-1)
        x = self.wide_bn(x)
        x = self.fc(x)

        return x
    
emb_szs = [(i, 4) for i in train[['city', 'state', 'type', 'cluster', 'family']].nunique()]
model = Conv1d_and_Emb(emb_szs)

In [10]:
class RMSLELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()

    def forward(self, pred, actual):
        return torch.sqrt(self.mse(torch.log1p(pred), torch.log1p(actual)))

In [11]:
# Datasets and Consts initialization
PREDICT_PERIOD = 16
SEQUENCE_LEN = 224
SEQ_LEN = 1684
BATCH = 512

dataset_train = Datataset_(train, sequence=SEQUENCE_LEN, seq_start=0, seq_end=SEQ_LEN-PREDICT_PERIOD)
dataset_eval = Datataset_(train, sequence=SEQUENCE_LEN, seq_start=SEQ_LEN-SEQUENCE_LEN-PREDICT_PERIOD, seq_end=SEQ_LEN)
dataset_test = Datataset_(train, sequence=SEQUENCE_LEN, seq_start=SEQ_LEN-SEQUENCE_LEN, seq_end=SEQ_LEN, test=True)

trainloader = DataLoader(dataset_train, batch_size=BATCH, num_workers=16, persistent_workers= True, shuffle=True)
evalloader = DataLoader(dataset_eval, batch_size=BATCH, num_workers=16, persistent_workers= True, shuffle=True)
testloader = DataLoader(dataset_test, batch_size=BATCH, num_workers=16, persistent_workers= True, shuffle=False)

#### Baseline model and Training

In [12]:
model = nn.Sequential(
                      nn.Conv1d(1, 1024, kernel_size=28, stride=1),
                      nn.ReLU(),
                      nn.Dropout(0.3),
                      nn.Conv1d(1024, 1024, kernel_size=14, stride=1),
                      nn.ReLU(),
                      nn.Dropout(0.3),
                      nn.Conv1d(1024, 32, kernel_size=7, stride=1),
                      nn.ReLU(),
                      nn.Dropout(0.3),
                      nn.Flatten(),
                      nn.Linear(2*2848, 1024*2),
                      nn.ReLU(),
                      nn.Dropout(0.3),                      
                      nn.Linear(1024*2, 16),
                      nn.ReLU())
model.to(device)
model.train()

metric = RMSLELoss()
optimizer = optim.Adam(model.parameters(), lr=5e-5)
print('done')

done


In [13]:
# %%time
epochs = 3
running_loss = 0
print_every = len(trainloader)//4

for epoch in range(epochs):
    counter = 0
    for X, C, T, y in tqdm(trainloader):
        X, C, T, y = X.to(device), C.to(device).to(torch.int), T.to(device), y.to(device)

        optimizer.zero_grad()
        preds = model.forward(X)
        loss = metric(preds, y)#    loss = metric(preds.mean(1).squeeze(1), y)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        if (counter!=0) and (counter % print_every == 0):
            eval_loss = 0
            model.eval()
            with torch.no_grad():
                for X, C, T, y in evalloader:
                    X, C, T, y = X.to(device), C.to(device).to(torch.int), T.to(device), y.to(device)

                    preds = model.forward(X)
                    batch_loss = metric(preds, y)
                    eval_loss += batch_loss.item()

            print(f"Epoch {epoch+1}/{epochs}.. ", '\n',
                  f"Train loss: {running_loss/(counter+1):.3f}..,", '\n',
                  f'Eval loss: {eval_loss/len(evalloader):.3f}')

            model.train()
        counter +=1
    running_loss = 0

  0%|          | 0/4974 [00:00<?, ?it/s]

Epoch 1/3..  
 Train loss: 0.678.., 
 Eval loss: 0.486
Epoch 1/3..  
 Train loss: 0.634.., 
 Eval loss: 0.473
Epoch 1/3..  
 Train loss: 0.616.., 
 Eval loss: 0.478
Epoch 1/3..  
 Train loss: 0.605.., 
 Eval loss: 0.499


  0%|          | 0/4974 [00:00<?, ?it/s]

Epoch 2/3..  
 Train loss: 0.564.., 
 Eval loss: 0.484
Epoch 2/3..  
 Train loss: 0.561.., 
 Eval loss: 0.491
Epoch 2/3..  
 Train loss: 0.559.., 
 Eval loss: 0.484
Epoch 2/3..  
 Train loss: 0.558.., 
 Eval loss: 0.472


  0%|          | 0/4974 [00:00<?, ?it/s]

Epoch 3/3..  
 Train loss: 0.546.., 
 Eval loss: 0.467
Epoch 3/3..  
 Train loss: 0.546.., 
 Eval loss: 0.469
Epoch 3/3..  
 Train loss: 0.546.., 
 Eval loss: 0.485
Epoch 3/3..  
 Train loss: 0.545.., 
 Eval loss: 0.477


#### Submission

In [14]:
submission = pd.read_csv('sample_submission.csv')
model.eval()
sub_preds = None

with torch.no_grad():
    for X, C, T in testloader:
        X, C, T = X.to(device), C.to(device).to(torch.int), T.to(device)
        preds = model.forward(X)
        if isinstance(sub_preds, np.ndarray):
            sub_preds = np.concatenate([sub_preds, preds.cpu().numpy()], axis=0)
        else:
            sub_preds = preds.cpu().numpy()

In [15]:
submission[['sales']] = sub_preds.reshape(-1, 1, order='F')
submission.to_csv("submission.csv", index=False)