In [27]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, SequentialSampler,DataLoader
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

from sklearn.preprocessing import MinMaxScaler,StandardScaler


In [28]:
# read data (for now, sell_prices & calendar are not used)

data_dir = '../code/data/'

train_sales = pd.read_csv(data_dir + 'sales_train_validation.csv')
#sell_prices = pd.read_csv(data_dir + 'sell_prices.csv')
#calendar = pd.read_csv(data_dir + 'calendar.csv')
submission_file = pd.read_csv(data_dir + 'sample_submission.csv')
eval_file = pd.read_csv(data_dir + 'sales_train_evaluation.csv')

In [29]:
# create training data, for now it only contains the sales and no extra features
sales = train_sales.drop(["id", "item_id", "dept_id", "cat_id", "store_id", "state_id"], axis=1)
# normalize training data

sales.head()

Unnamed: 0,d_1,d_2,d_3,d_4,d_5,d_6,d_7,d_8,d_9,d_10,...,d_1904,d_1905,d_1906,d_1907,d_1908,d_1909,d_1910,d_1911,d_1912,d_1913
0,0,0,0,0,0,0,0,0,0,0,...,1,3,0,1,1,1,3,0,1,1
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,2,1,2,1,1,1,0,1,1,1
3,0,0,0,0,0,0,0,0,0,0,...,1,0,5,4,1,0,1,3,7,2
4,0,0,0,0,0,0,0,0,0,0,...,2,1,1,0,1,1,2,2,2,4


In [31]:
start_day = 0

timesteps = 50
prediction_steps = 28
len_window = timesteps + prediction_steps



In [None]:
class BigDataSet(Dataset):
    
    def __init__(self,data,labels):
        self.data = data
        self.labels = labels
        
    def __getitem__(self,index):
        
        x = self.data[index]
        y = self.labels[index]
        x_tensor = torch.tensor(x).type(dtype=torch.float)
        y_tensor = torch.tensor(y).type(dtype=torch.float)
        return x_tensor.reshape(1,14),y_tensor
    
    def __len__(self):
        return len(self.data)

In [32]:
class SlidingWindowDataset(Dataset):
    """Sliding window dataset"""
    
    def __init__(self,csv_data,labels,timesteps,sliding_window):
        self.data = csv_data
        self.labels = labels
        self.timesteps = timesteps
        self.sliding_window = sliding_window

    def __getitem__(self,index):
        x = self.data[:, index: index + self.timesteps]
        y = self.labels[:, index + self.timesteps : index+self.timesteps+1]
        x = torch.tensor(x).type(dtype=torch.float)
        y = torch.tensor(y).type(dtype=torch.float)
        return x,y
    
    def __len__(self):
        return self.data.shape[1] - self.sliding_window + 1
        
        

In [None]:
## lets try to implement our own StandarScaler

sales_numpy = torch.tensor(sales.to_numpy()).type(dtype=torch.float)
sales_mean = sales_numpy.mean(1, keepdim=True)
sales_std = sales_numpy.std(1, unbiased=False, keepdim=True)

sales_numpy -= sales_mean
sales_numpy /= sales_std


In [None]:
sales_numpy

In [None]:
## lets try to implement our own Min Max Scaler where min = 0 and max = 1 

sales_numpy = torch.tensor(sales.to_numpy()).type(dtype=torch.float)
max_value = sales_numpy.max()
print(max_value)

sales_numpy /= max_value


In [33]:
scaler = StandardScaler()

scaler.fit(sales.T)

scaled_data = scaler.transform(sales.T)

In [34]:
scaled_data.shape

(1913, 30490)

In [35]:
sales_numpy = scaled_data.T

In [36]:
dataset = SlidingWindowDataset(sales_numpy[:,start_day:],sales_numpy[:,start_day:],50,(timesteps+prediction_steps))

In [None]:
# dataset = BigDataSet(base,predictions)

In [None]:
dataset_sampler = SequentialSampler(dataset)

In [37]:
dataset_sampler_loader = DataLoader(dataset,batch_size=32,shuffle=False)

In [11]:
for x,y in dataset_sampler_loader:
    print(x.shape)
    print(y.shape)
    

torch.Size([32, 30490, 50])
torch.Size([32, 30490, 1])
torch.Size([32, 30490, 50])
torch.Size([32, 30490, 1])
torch.Size([32, 30490, 50])
torch.Size([32, 30490, 1])
torch.Size([32, 30490, 50])
torch.Size([32, 30490, 1])
torch.Size([32, 30490, 50])
torch.Size([32, 30490, 1])
torch.Size([32, 30490, 50])
torch.Size([32, 30490, 1])
torch.Size([32, 30490, 50])
torch.Size([32, 30490, 1])
torch.Size([32, 30490, 50])
torch.Size([32, 30490, 1])
torch.Size([32, 30490, 50])
torch.Size([32, 30490, 1])
torch.Size([32, 30490, 50])
torch.Size([32, 30490, 1])
torch.Size([32, 30490, 50])
torch.Size([32, 30490, 1])
torch.Size([32, 30490, 50])
torch.Size([32, 30490, 1])
torch.Size([32, 30490, 50])
torch.Size([32, 30490, 1])
torch.Size([7, 30490, 50])
torch.Size([7, 30490, 1])


In [None]:
## I think it would be easier to select a certain set of features, 
## train it with a CNN and extract feacture vectors for the LSTM
class EventCNN(nn.Module):
    def __init__(self, n_features):
        super(EventCNN, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_1 = nn.Linear(input_shape, 256)
        self.feature_linear = nn.Linear(256, n_features)
        self.classification_linear = nn.Linear(n_features, nr_of_events)
        
    def forward(self, x):
        h = self.linear_1(self.flatten(x))
        h = self.feature_linear(h)
        h = self.classification_linear(h)
        return torch.squeeze(h)

In [None]:
class CombinationNet(nn.Module):
    
    def __init__(self,cnn,lstm):
        super(CombinationNet, self).__init__()
        self.feature_vectors = torch.nn.Sequential(*(list(cnn_net.children())[:-1]))
        self.lstm = lstm
    
    def forward(self,x):
        x = self.feature_vectors(x)
        x = self.lstm(x)
        

In [None]:
class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        self.conv = nn.Conv2d(in_channels, 64, (2,1),1 ) ## input depends on sliding window...
        self.relu = nn.Relu()
        self.pool = nn.MaxPool2d((2,1),2)
        self.timedistributed = ... ### not sure about this one, this is a keras specific layer, but we could just use a linear layer
        self.lstm = nn.LSTM(64,hidden_size, num_layers=1)
        self.tanh = nn.Tanh
        self.lin1 = nn.Linear(in_features,32)
        self.lin2 = nn.Linear(in_features,output_size) # 
    
    def forward(self,x):
        x = self.conv(x)
        x = self.relu(x)
        x = self.pool1(x)
        x = self.conv(x)
        x = self.relu(x)
        x = self.pool(x)
        ## TimeDistributed
        x = self.lstm(x)
        x = self.tanh(x)
        x = self.lin1(x)
        x = self.lin2(x)
        return x

In [38]:
class LSTM(nn.Module):
    def __init__(self,prediction_steps):
        super(LSTM, self).__init__()
        self.lstm1 = nn.LSTM(input_size=50,hidden_size=10,num_layers=2,bidirectional=False)
        self.lin = nn.Linear(10,1)
        
    def forward(self,x):
        out,state = self.lstm1(x)
        x = self.lin(out)
        return x
    

In [39]:
lstm = LSTM(28)

In [40]:
criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(lstm.parameters(), lr=0.001,weight_decay=1e-5)


In [41]:
for epoch in range(20):
    for batch_id, (sliding_window, label) in tqdm(enumerate(dataset_sampler_loader), total=len(dataset_sampler_loader), desc="#train batches", leave=False):

        lstm.train()
        outputs = lstm(sliding_window)
        optimizer.zero_grad()
        loss = criterion(outputs,label)

        loss.backward()
        optimizer.step()

    print("Epoch: %d, loss: %1.5f  " %(epoch, loss.cpu().item()))

HBox(children=(FloatProgress(value=0.0, description='#train batches', max=58.0, style=ProgressStyle(descriptio…

Epoch: 0, loss: 1.43303  


HBox(children=(FloatProgress(value=0.0, description='#train batches', max=58.0, style=ProgressStyle(descriptio…

Epoch: 1, loss: 1.39785  


HBox(children=(FloatProgress(value=0.0, description='#train batches', max=58.0, style=ProgressStyle(descriptio…

Epoch: 2, loss: 1.37480  


HBox(children=(FloatProgress(value=0.0, description='#train batches', max=58.0, style=ProgressStyle(descriptio…

Epoch: 3, loss: 1.36138  


HBox(children=(FloatProgress(value=0.0, description='#train batches', max=58.0, style=ProgressStyle(descriptio…

Epoch: 4, loss: 1.35271  


HBox(children=(FloatProgress(value=0.0, description='#train batches', max=58.0, style=ProgressStyle(descriptio…

Epoch: 5, loss: 1.34668  


HBox(children=(FloatProgress(value=0.0, description='#train batches', max=58.0, style=ProgressStyle(descriptio…

Epoch: 6, loss: 1.34233  


HBox(children=(FloatProgress(value=0.0, description='#train batches', max=58.0, style=ProgressStyle(descriptio…

Epoch: 7, loss: 1.33892  


HBox(children=(FloatProgress(value=0.0, description='#train batches', max=58.0, style=ProgressStyle(descriptio…

Epoch: 8, loss: 1.33605  


HBox(children=(FloatProgress(value=0.0, description='#train batches', max=58.0, style=ProgressStyle(descriptio…

Epoch: 9, loss: 1.33355  


HBox(children=(FloatProgress(value=0.0, description='#train batches', max=58.0, style=ProgressStyle(descriptio…

Epoch: 10, loss: 1.33137  


HBox(children=(FloatProgress(value=0.0, description='#train batches', max=58.0, style=ProgressStyle(descriptio…

Epoch: 11, loss: 1.32950  


HBox(children=(FloatProgress(value=0.0, description='#train batches', max=58.0, style=ProgressStyle(descriptio…

Epoch: 12, loss: 1.32788  


HBox(children=(FloatProgress(value=0.0, description='#train batches', max=58.0, style=ProgressStyle(descriptio…

Epoch: 13, loss: 1.32647  


HBox(children=(FloatProgress(value=0.0, description='#train batches', max=58.0, style=ProgressStyle(descriptio…

Epoch: 14, loss: 1.32523  


HBox(children=(FloatProgress(value=0.0, description='#train batches', max=58.0, style=ProgressStyle(descriptio…

Epoch: 15, loss: 1.32414  


HBox(children=(FloatProgress(value=0.0, description='#train batches', max=58.0, style=ProgressStyle(descriptio…

Epoch: 16, loss: 1.32317  


HBox(children=(FloatProgress(value=0.0, description='#train batches', max=58.0, style=ProgressStyle(descriptio…

Epoch: 17, loss: 1.32228  


HBox(children=(FloatProgress(value=0.0, description='#train batches', max=58.0, style=ProgressStyle(descriptio…

Epoch: 18, loss: 1.32148  


HBox(children=(FloatProgress(value=0.0, description='#train batches', max=58.0, style=ProgressStyle(descriptio…

Epoch: 19, loss: 1.32075  


In [None]:
for epoch in range(1):

    for train,label in zip(base,predictions):
        train_tensor = torch.tensor(train).type(dtype=torch.float)
        label_tensor =  torch.tensor(label).type(dtype=torch.float)
        lstm.train()
        outputs = lstm(train_tensor.reshape(1,1,14))
        optimizer.zero_grad()
        loss = criterion(outputs,label_tensor)

        loss.backward()
        optimizer.step()

    print("Epoch: %d, loss: %1.5f  " %(epoch, loss.cpu().item()))

In [49]:


# get input data for predictions
X_pred = sales.iloc[:,-timesteps:].values

X_pred = scaler.transform(X_pred.T)
X_pred = X_pred.reshape(1, X_pred.shape[0], X_pred.shape[1])

X_pred = torch.tensor(X_pred).type(dtype=torch.float)

In [50]:
X_pred.shape
# X_pred = X_pred.reshape(1,30490,14)

torch.Size([1, 50, 30490])

In [51]:

# get predictions
# norm_pred = lstm(X_pred).detach()
# norm_pred.shape

# norm_pred = norm_pred.reshape(norm_pred.shape[1],norm_pred.shape[2])

# predictions = scaler.inverse_transform(norm_pred)
# # norm_pred += sales_mean
# # norm_pred *= sales_std
# predictions = np.round(np.abs(norm_pred)).numpy()

In [52]:
lstm.eval()

LSTM(
  (lstm1): LSTM(50, 10, num_layers=2)
  (lin): Linear(in_features=10, out_features=1, bias=True)
)

In [53]:
predictions = []

for i in range(timesteps, timesteps+28):
    print(i)
    print(timesteps)
    print(X_pred.shape)
    print(X_pred[:,i-timesteps:i:].shape)
#     prediction = lstm(X_pred[:,i-timesteps:i:].reshape(1,30490,timesteps))
    prediction = lstm(X_pred.reshape(1,30490,timesteps))

    rescaled_prediction = scaler.inverse_transform(prediction.reshape(1,30490).detach().numpy())
    print(rescaled_prediction)
    print("-----")
    print(predictions.append(rescaled_prediction))
    

50
50
torch.Size([1, 50, 30490])
torch.Size([1, 50, 30490])
[[0.5370785  0.49526098 0.2740485  ... 1.5622959  0.85911125 0.85050523]]
-----
None
51
50
torch.Size([1, 50, 30490])
torch.Size([1, 49, 30490])
[[0.5370785  0.49526098 0.2740485  ... 1.5622959  0.85911125 0.85050523]]
-----
None
52
50
torch.Size([1, 50, 30490])
torch.Size([1, 48, 30490])
[[0.5370785  0.49526098 0.2740485  ... 1.5622959  0.85911125 0.85050523]]
-----
None
53
50
torch.Size([1, 50, 30490])
torch.Size([1, 47, 30490])
[[0.5370785  0.49526098 0.2740485  ... 1.5622959  0.85911125 0.85050523]]
-----
None
54
50
torch.Size([1, 50, 30490])
torch.Size([1, 46, 30490])
[[0.5370785  0.49526098 0.2740485  ... 1.5622959  0.85911125 0.85050523]]
-----
None
55
50
torch.Size([1, 50, 30490])
torch.Size([1, 45, 30490])
[[0.5370785  0.49526098 0.2740485  ... 1.5622959  0.85911125 0.85050523]]
-----
None
56
50
torch.Size([1, 50, 30490])
torch.Size([1, 44, 30490])
[[0.5370785  0.49526098 0.2740485  ... 1.5622959  0.85911125 0.8505052

In [54]:
predictions

[array([[0.5370785 , 0.49526098, 0.2740485 , ..., 1.5622959 , 0.85911125,
         0.85050523]], dtype=float32),
 array([[0.5370785 , 0.49526098, 0.2740485 , ..., 1.5622959 , 0.85911125,
         0.85050523]], dtype=float32),
 array([[0.5370785 , 0.49526098, 0.2740485 , ..., 1.5622959 , 0.85911125,
         0.85050523]], dtype=float32),
 array([[0.5370785 , 0.49526098, 0.2740485 , ..., 1.5622959 , 0.85911125,
         0.85050523]], dtype=float32),
 array([[0.5370785 , 0.49526098, 0.2740485 , ..., 1.5622959 , 0.85911125,
         0.85050523]], dtype=float32),
 array([[0.5370785 , 0.49526098, 0.2740485 , ..., 1.5622959 , 0.85911125,
         0.85050523]], dtype=float32),
 array([[0.5370785 , 0.49526098, 0.2740485 , ..., 1.5622959 , 0.85911125,
         0.85050523]], dtype=float32),
 array([[0.5370785 , 0.49526098, 0.2740485 , ..., 1.5622959 , 0.85911125,
         0.85050523]], dtype=float32),
 array([[0.5370785 , 0.49526098, 0.2740485 , ..., 1.5622959 , 0.85911125,
         0.85050523]],

In [55]:
submission = pd.DataFrame(data=np.array(predictions).reshape(28,30490))

In [56]:
validation = pd.concat([submission.T, submission.T])
validation = validation.astype(int)

validation.reset_index(inplace=True, drop=True)

validation['id'] = submission_file.id
validation = validation.reindex(columns=['id'] + [c for c in validation.columns if c != 'id'], copy=False)

validation.columns = ['id'] + [f"F{i}" for i in range(1, 29)]

validation.to_csv('submission_1daylstm_lowlr_50days.csv', index=False)

In [57]:
validation

Unnamed: 0,id,F1,F2,F3,F4,F5,F6,F7,F8,F9,...,F19,F20,F21,F22,F23,F24,F25,F26,F27,F28
0,HOBBIES_1_001_CA_1_validation,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,HOBBIES_1_002_CA_1_validation,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,HOBBIES_1_003_CA_1_validation,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,HOBBIES_1_004_CA_1_validation,2,2,2,2,2,2,2,2,2,...,2,2,2,2,2,2,2,2,2,2
4,HOBBIES_1_005_CA_1_validation,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60975,FOODS_3_823_WI_3_evaluation,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
60976,FOODS_3_824_WI_3_evaluation,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
60977,FOODS_3_825_WI_3_evaluation,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
60978,FOODS_3_826_WI_3_evaluation,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
# create submission file

validation = pd.concat([pd.DataFrame(predictions)])
validation = validation.astype(int)

validation.reset_index(inplace=True, drop=True)

validation['id'] = submission_file.id
validation = validation.reindex(columns=['id'] + [c for c in validation.columns if c != 'id'], copy=False)

validation.columns = ['id'] + [f"F{i}" for i in range(1, 29)]

validation.to_csv('submission_7.csv', index=False)

In [None]:
validation

In [None]:
eval_file.iloc[0:35,-28:]