In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, SequentialSampler,DataLoader
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

In [2]:
data_dir = 'data/'

In [3]:
class SlidingWindowDataset(Dataset):
    """Sliding window dataset"""
    
    def __init__(self,csv_data,timesteps,sliding_window):
        self.data = csv_data
        self.timesteps = timesteps
        self.sliding_window = sliding_window
    
    def __getitem__(self,index):
        x = self.data.iloc[:, index: index + self.timesteps]
        y = self.data.iloc[:, index + self.timesteps : index+self.sliding_window ]
        x = torch.tensor(x.values).type(dtype=torch.float)
        y = torch.tensor(y.values).type(dtype=torch.float)
        return x,y
    
    def __len__(self):
        return self.data.shape[1] - self.sliding_window + 1

In [4]:
# read data (for now, sell_prices & calendar are not used)

data_dir = 'data/'

train_sales = pd.read_csv(data_dir + 'sales_train_validation.csv')
#sell_prices = pd.read_csv(data_dir + 'sell_prices.csv')
#calendar = pd.read_csv(data_dir + 'calendar.csv')
submission_file = pd.read_csv(data_dir + 'sample_submission.csv')

In [5]:
# create training data, for now it only contains the sales and no extra features
sales = train_sales.drop(["id", "item_id", "dept_id", "cat_id", "store_id", "state_id"], axis=1)
sales.head()

Unnamed: 0,d_1,d_2,d_3,d_4,d_5,d_6,d_7,d_8,d_9,d_10,...,d_1904,d_1905,d_1906,d_1907,d_1908,d_1909,d_1910,d_1911,d_1912,d_1913
0,0,0,0,0,0,0,0,0,0,0,...,1,3,0,1,1,1,3,0,1,1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,2,1,2,1,1,1,0,1,1,1
3,0,0,0,0,0,0,0,0,0,0,...,1,0,5,4,1,0,1,3,7,2
4,0,0,0,0,0,0,0,0,0,0,...,2,1,1,0,1,1,2,2,2,4


In [6]:
start_day = 1

timesteps = 14
prediction_steps = 28
len_window = timesteps + prediction_steps

In [7]:
dataset = SlidingWindowDataset(sales,14,(timesteps+prediction_steps))

In [8]:
dataset_sampler_loader = DataLoader(dataset,batch_size=32,shuffle=False)

In [13]:
class LSTM(nn.Module):
    def __init__(self,prediction_steps):
        super(LSTM, self).__init__()
        self.lstm1 = nn.LSTM(input_size=14,hidden_size=10,num_layers=2,bidirectional=True)
        self.lin = nn.Linear(20,28)
        
    def forward(self,x):
        x, _ = self.lstm1(x)
        x = self.lin(x)
        return x

In [14]:
lstm = LSTM(28)

In [15]:
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(lstm.parameters(), lr=0.001, weight_decay=1e-5)

In [19]:
for epoch in range(10):
    for batch_id, (sliding_window, label) in tqdm(enumerate(dataset_sampler_loader), total=len(dataset_sampler_loader), desc="#train batches", leave=False):
       
        lstm.train()
        outputs = lstm(sliding_window)
        optimizer.zero_grad()
        loss = criterion(outputs,label)

        loss.backward()
        optimizer.step()

    print("Epoch: %d, loss: %1.5f  " %(epoch, loss.cpu().item()))

HBox(children=(FloatProgress(value=0.0, description='#train batches', max=59.0, style=ProgressStyle(descriptio…

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31


KeyboardInterrupt: 