In [12]:
import pandas as pd
from torch.utils.data import DataLoader
import numpy as np

In [9]:
train=pd.read_csv('./dataset/trainprocessed.csv')

In [14]:
from torch.utils.data import DataLoader, Dataset

class windowDataset(Dataset):
    def __init__(self, y, input_window=4, output_window=4, stride=4):
        #총 데이터의 개수
        L = y.shape[0]
        #stride씩 움직일 때 생기는 총 sample의 개수
        num_samples = (L - input_window - output_window) // stride + 1

        #input과 output
        X = np.zeros([input_window, num_samples])
        Y = np.zeros([output_window, num_samples])

        for i in np.arange(num_samples):
            start_x = stride*i
            end_x = start_x + input_window
            X[:,i] = y[start_x:end_x]

            start_y = stride*i + input_window
            end_y = start_y + output_window
            Y[:,i] = y[start_y:end_y]

        X = X.reshape(X.shape[0], X.shape[1], 1).transpose((1,0,2))
        Y = Y.reshape(Y.shape[0], Y.shape[1], 1).transpose((1,0,2))
        self.x = X
        self.y = Y
        
        self.len = len(X)
    def __getitem__(self, i):
        return self.x[i], self.y[i, :-1], self.y[i,1:]
    def __len__(self):
        return self.len

In [15]:
train

Unnamed: 0,Store,Date,Temperature,Fuel_Price,Promotion1,Promotion2,Promotion3,Promotion4,Promotion5,Unemployment,IsHoliday,Year,Month,Unemployment_Class,Weekly_Sales,ispeak
0,1,2010-02-05,42.31,2.572,0.00,0.00,0.00,0.00,0.00,8.106,False,2010,2,0,1643690.90,0
1,1,2010-02-12,38.51,2.548,0.00,0.00,0.00,0.00,0.00,8.106,True,2010,2,0,1641957.44,0
2,1,2010-02-19,39.93,2.514,0.00,0.00,0.00,0.00,0.00,8.106,False,2010,2,0,1611968.17,0
3,1,2010-02-26,46.63,2.561,0.00,0.00,0.00,0.00,0.00,8.106,False,2010,2,0,1409727.59,0
4,1,2010-03-05,46.50,2.625,0.00,0.00,0.00,0.00,0.00,8.106,False,2010,3,0,1554806.68,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6250,45,2012-08-31,75.09,3.867,23641.30,6.00,92.93,6988.31,3992.13,8.684,False,2012,8,3,734297.87,0
6251,45,2012-09-07,75.70,3.911,11024.45,12.80,52.63,1854.77,2055.70,8.684,True,2012,9,3,766512.66,0
6252,45,2012-09-14,67.87,3.948,11407.95,92.28,4.30,3421.72,5268.92,8.684,False,2012,9,3,702238.27,0
6253,45,2012-09-21,65.32,4.038,8452.20,92.28,63.24,2376.38,8670.40,8.684,False,2012,9,3,723086.20,0


In [20]:
window=windowDataset(train.query('Store==1')['Weekly_Sales'])

In [24]:
window.x.shape

(33, 4, 1)

In [46]:
class TFModel(nn.Module):
    def __init__(self,iw, ow, d_model, nhead, nlayers, dropout=0.5):
        super(TFModel, self).__init__()
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=nlayers) 
        self.pos_encoder = PositionalEncoding(d_model, dropout)

        self.encoder = nn.Sequential(
            nn.Linear(1, d_model//2),
            nn.ReLU(),
            nn.Linear(d_model//2, d_model)
        )
        
        self.linear =  nn.Sequential(
            nn.Linear(d_model, d_model//2),
            nn.ReLU(),
            nn.Linear(d_model//2, 1)
        )

        self.linear2 = nn.Sequential(
            nn.Linear(iw, (iw+ow)//2),
            nn.ReLU(),
            nn.Linear((iw+ow)//2, ow)
        ) 

    def generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask

    def forward(self, src, srcmask):
        src = self.encoder(src)
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src.transpose(0,1), srcmask).transpose(0,1)
        output = self.linear(output)[:,:,0]
        output = self.linear2(output)
        return output

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

def gen_attention_mask(x):
    mask = torch.eq(x, 0)
    return mask

In [49]:
from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm
from IPython.display import clear_output
import torch

device = torch.device('cuda') if torch.cuda.is_available() else torch.device(
    'cpu')
submissions=[]

def training(data):
    for i in tqdm(range(1, 46)):
        model = TFModel(4,4,512, 8, 6, 0.4).to(device)
        train_data = data.query(
            'Store=={}'.format(i))['Weekly_Sales'].to_numpy().reshape(-1, 1)
        MinMax = MinMaxScaler()
        train_data = MinMax.fit_transform(train_data)
        train_data = train_data.flatten()
        window = windowDataset(train_data, input_window=4, output_window=4, stride=4)

        lr = 1e-3
        criterion = nn.MSELoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)
        epoch = 100
        progress = range(epoch)
        for k in progress:
            batchloss = 0.0
            optimizer.zero_grad()
            train_x=torch.tensor(window.x)
            train_y=torch.tensor(window.y)
            src_mask = model.generate_square_subsequent_mask(train_x.shape[1]).to(device)
    
            result = model(train_x.float().to(device), src_mask)
            loss = criterion(result, train_y[:, :, 0].float().to(device))
            loss.backward()
            optimizer.step()
            clear_output(wait=True)
            test_x = torch.tensor(train_data[-4:]).reshape(1,-1,1).to(device).float().to(device)
            src_mask_test = model.generate_square_subsequent_mask(
                test_x.shape[1]).to(device)
        prediction = model(test_x.float().to(device), src_mask_test)
        prediction = prediction.detach().cpu().numpy()
        pred = MinMax.inverse_transform(prediction)
        pred = np.where(pred > 0, pred, 0).reshape(-1)
        submissions=submissions.append(pred)
    return submissions

In [50]:
prediction=training(train)

  2%|▏         | 1/45 [00:09<07:09,  9.77s/it]


RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 2.00 GiB total capacity; 1.16 GiB already allocated; 8.50 MiB free; 1.22 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [51]:
sub=pd.read_csv('./dataset/sample_submission.csv')

Unnamed: 0,id,Weekly_Sales
0,1,0
1,2,0
2,3,0
3,4,0
4,5,0
...,...,...
175,176,0
176,177,0
177,178,0
178,179,0


In [None]:
sub['Weekly_Sales']=prediction
sub.to_csv('./dataset/transformerpredict.csv',index=False)