### Домашняя работа к уроку 5

Обучить GRU, LSTM для предсказания временного ряда на примере https://www.kaggle.com/c/favorita-grocery-sales-forecasting (для каждого типа продуктов)

In [1]:
import torch
import torch.nn as nn

import seaborn as sns
import numpy as np
import pandas as pd

from sklearn.preprocessing import MinMaxScaler

In [2]:
df = pd.read_csv("data4/train.csv")

In [3]:
df.head(5)

Unnamed: 0,id,date,store_nbr,item_nbr,unit_sales,onpromotion
0,0,2013-01-01,25,103665,7.0,
1,1,2013-01-01,25,105574,1.0,
2,2,2013-01-01,25,105575,2.0,
3,3,2013-01-01,25,108079,1.0,
4,4,2013-01-01,25,108701,1.0,


#### Подготавливаем данные

In [4]:
df = df.drop(['id', 'date', 'onpromotion' ], axis='columns')
df.head(5)

Unnamed: 0,store_nbr,item_nbr,unit_sales
0,25,103665,7.0
1,25,105574,1.0
2,25,105575,2.0
3,25,108079,1.0
4,25,108701,1.0


In [5]:
df.shape

(3000, 3)

In [6]:
df.columns

Index(['store_nbr', 'item_nbr', 'unit_sales'], dtype='object')

##### Изменяем тип столбца unit_sales на float

In [7]:
all_data = df['unit_sales'].values.astype(float)
print(all_data)

[ 7.  1.  2. ... 14. 23. 18.]


#### Подрезал данные до 3000 записей чтобы не тормазило и разделил на train и test

In [8]:
test_data_size = 300

train_data = all_data[:-test_data_size]
test_data = all_data[-test_data_size:]

In [9]:
print(len(train_data))
print(len(test_data))

2700
300


In [10]:
print(test_data)

[ 36.     21.      6.      2.     12.     36.     11.      2.      9.
  15.     11.     18.    135.003   4.      5.     20.     98.     16.
  46.     27.      2.     22.     19.     62.      7.      9.      9.
  12.     14.     55.     50.     51.     39.      3.     13.    119.
  33.     17.      6.      8.     25.      2.      9.      2.      9.
   8.     15.     24.      3.     13.      1.     18.     10.      6.
  43.     16.     19.     10.     72.     18.     15.      7.      6.
   1.      8.      3.      3.     32.     17.      9.     11.      9.
  11.    141.47   45.      3.      8.     20.     18.      9.      8.
   2.    134.     18.     24.     27.     15.     24.     13.     18.
  91.     54.      8.      3.     17.      1.     10.      4.     29.
   1.     18.      1.      5.     23.      4.      9.     27.     18.
   3.     18.    121.     22.     20.     15.     16.      6.291   6.675
   2.      9.     12.      8.      9.     16.      4.      2.     11.
  13.     34.    

#### Нормализуем данные с помощью скалера min/max

In [11]:
scaler = MinMaxScaler(feature_range=(-1, 1))
train_data_normalized = scaler.fit_transform(train_data .reshape(-1, 1))

print(train_data_normalized[:5])
print(train_data_normalized[-5:])

[[-0.93719636]
 [-0.99303836]
 [-0.98373136]
 [-0.99303836]
 [-0.99303836]]
[[-0.97442436]
 [-0.92788936]
 [-0.96511736]
 [-0.93719636]
 [-0.94650336]]


#### Преобразовываем набора данных в тензоры для использования в PyTorch.

In [12]:
train_data_normalized = torch.FloatTensor(train_data_normalized).view(-1)

In [13]:
train_window = 12

In [14]:
def create_inout_sequences(input_data, tw):
    inout_seq = []
    L = len(input_data)
    for i in range(L-tw):
        train_seq = input_data[i:i+tw]
        train_label = input_data[i+tw:i+tw+1]
        inout_seq.append((train_seq ,train_label))
    return inout_seq

In [15]:
train_inout_seq = create_inout_sequences(train_data_normalized, train_window)

In [16]:
train_inout_seq[:5]

[(tensor([-0.9372, -0.9930, -0.9837, -0.9930, -0.9930, -0.9744, -0.9930, -0.9930,
          -0.8814, -0.9744, -0.9930, -0.9930]),
  tensor([-0.9930])),
 (tensor([-0.9930, -0.9837, -0.9930, -0.9930, -0.9744, -0.9930, -0.9930, -0.8814,
          -0.9744, -0.9930, -0.9930, -0.9930]),
  tensor([-0.9930])),
 (tensor([-0.9837, -0.9930, -0.9930, -0.9744, -0.9930, -0.9930, -0.8814, -0.9744,
          -0.9930, -0.9930, -0.9930, -0.9930]),
  tensor([-0.9558])),
 (tensor([-0.9930, -0.9930, -0.9744, -0.9930, -0.9930, -0.8814, -0.9744, -0.9930,
          -0.9930, -0.9930, -0.9930, -0.9558]),
  tensor([-0.9930])),
 (tensor([-0.9930, -0.9744, -0.9930, -0.9930, -0.8814, -0.9744, -0.9930, -0.9930,
          -0.9930, -0.9930, -0.9558, -0.9930]),
  tensor([-0.9465]))]

### LSTM

In [17]:
class LSTM(nn.Module):
    def __init__(self, input_size=1, hidden_layer_size=100, output_size=1):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size
        self.lstm = nn.LSTM(input_size, hidden_layer_size)
        self.linear = nn.Linear(hidden_layer_size, output_size)
        self.hidden_cell = (torch.zeros(1,1,self.hidden_layer_size),
                            torch.zeros(1,1,self.hidden_layer_size))

    def forward(self, input_seq):
        lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq) ,1, -1), self.hidden_cell)
        predictions = self.linear(lstm_out.view(len(input_seq), -1))
        return predictions[-1]

In [18]:
model = LSTM()
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [19]:
print(model)

LSTM(
  (lstm): LSTM(1, 100)
  (linear): Linear(in_features=100, out_features=1, bias=True)
)


In [20]:
epochs = 10

for i in range(epochs):
    for seq, labels in train_inout_seq:
        optimizer.zero_grad()
        model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer_size),
                        torch.zeros(1, 1, model.hidden_layer_size))

        y_pred = model(seq)

        single_loss = loss_function(y_pred, labels)
        single_loss.backward()
        optimizer.step()

    if i%25 == 1:
        print(f'epoch: {i:3} loss: {single_loss.item():10.8f}')

print(f'epoch: {i:3} loss: {single_loss.item():10.10f}')

epoch:   1 loss: 0.00122883
epoch:   9 loss: 0.0006672089


In [21]:
fut_pred = 300

test_inputs = train_data_normalized[-train_window:].tolist()
print(test_inputs)

[-0.8068983554840088, -0.927889347076416, -0.9837313890457153, -0.9930383563041687, -0.8068983554840088, -0.9744243621826172, -0.9744243621826172, -0.9744243621826172, -0.927889347076416, -0.965117335319519, -0.9371963739395142, -0.9465033411979675]


In [22]:
model.eval()

for i in range(fut_pred):
    seq = torch.FloatTensor(test_inputs[-train_window:])
    with torch.no_grad():
        model.hidden = (torch.zeros(1, 1, model.hidden_layer_size),
                        torch.zeros(1, 1, model.hidden_layer_size))
        test_inputs.append(model(seq).item())

In [23]:
test_inputs[fut_pred:]

[-0.9212515950202942,
 -0.9212515950202942,
 -0.9212515950202942,
 -0.9212515950202942,
 -0.9212515950202942,
 -0.9212515950202942,
 -0.9212515950202942,
 -0.9212515950202942,
 -0.9212515950202942,
 -0.9212515950202942,
 -0.9212515950202942,
 -0.9212515950202942]

### GRU (просто меняем LSTM на GRU как учили на лекции)

In [24]:
class GRU(nn.Module):
    def __init__(self, input_size=1, hidden_layer_size=100, output_size=1):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size
        self.gru = nn.GRU(input_size, hidden_layer_size)
        self.linear = nn.Linear(hidden_layer_size, output_size)
        self.hidden_cell = (torch.zeros(1,1,self.hidden_layer_size),
                            torch.zeros(1,1,self.hidden_layer_size))

    def forward(self, input_seq):
        gru_out, self.hidden_cell = self.gru(input_seq.view(len(input_seq) ,1, -1), self.hidden_cell)
        predictions = self.linear(gru_out.view(len(input_seq), -1))
        return predictions[-1]

In [25]:
model = GRU()
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [26]:
print(model)

GRU(
  (gru): GRU(1, 100)
  (linear): Linear(in_features=100, out_features=1, bias=True)
)


In [None]:
epochs = 10

for i in range(epochs):
    for seq, labels in train_inout_seq:
        optimizer.zero_grad()
        model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer_size),
                             torch.zeros(1, 1, model.hidden_layer_size))

        y_pred = model(seq)

        single_loss = loss_function(y_pred, labels)
        single_loss.backward()
        optimizer.step()

    if i%25 == 1:
        print(f'epoch: {i:3} loss: {single_loss.item():10.8f}')

print(f'epoch: {i:3} loss: {single_loss.item():10.10f}')