In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import random
import warnings
from tqdm.auto import tqdm
from sklearn.preprocessing import LabelEncoder
from torch.nn import _reduction as _Reduction

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable
import platform

warnings.filterwarnings('ignore')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

device

device(type='cuda')

In [2]:
CFG = {
    'TRAIN_WINDOW_SIZE':90, # 90일치로 학습
    'PREDICT_SIZE':21, # 21일치 예측
    'EPOCHS':10,
    'LEARNING_RATE':1e-4,
    'BATCH_SIZE': 1024,
    'SEED':42
}

PATH = os.getcwd() + '/data/'
if platform.system() == 'Darwin':
    LOADPATH = '/Users/a1r/Desktop/DL/timeseries_new_data/'
else:
    LOADPATH = '/home/a1r/바탕화면/DL/timeseries_new_data/'

In [3]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = False
    torch.backends.cudnn.benchmark = False

seed_everything(CFG['SEED']) # Seed 고정

### Model

In [4]:
class GRU(nn.Module):
    def __init__(self, in_channel=9, hidden_size = 512, out_channel=CFG['PREDICT_SIZE'], num_layers = 2):
            super(GRU, self).__init__()
            self.num_layers = num_layers
            self.hidden_size = hidden_size
            self.input_size = in_channel

            self.gru = nn.GRU(input_size = self.input_size,
                              hidden_size = self.hidden_size,
                              num_layers = self.num_layers,
                              batch_first=True)
            
            self.fc = nn.Sequential(
                  nn.Linear(hidden_size, hidden_size//2),
                  nn.ReLU(),
                  nn.Dropout(0.4),
                  nn.Linear(hidden_size//2, out_channel)
                  )
            self.actv = nn.ReLU()
    
    def forward(self, x):
        # x shape: (B: batch_size, TRAIN_WINDOW_SIZE: 90, 9)
        batch_size = x.size(0)
        hidden = self.init_hidden(batch_size, x.device)
        
        # gru layer
        gru_out, hidden = self.gru(x, hidden)
        
        # Only use the last output sequence
        last_output = gru_out[:, -1, :]
        
        # Fully connected layer
        output = self.actv(self.fc(last_output))
        
        return output.squeeze(1)
    
    def init_hidden(self, batch_size, device):
        # Initialize hidden state and cell state
        return Variable(torch.zeros(self.num_layers, batch_size, self.hidden_size, device = device)) # h0

## Loss & Metric

In [5]:
def pseudo_sfa(pred, df):
    pred_length = pred.shape[1] - 1
    true = df.iloc[:, -pred_length:].reset_index() \
        .rename(columns={"index": "ID"})

    main_id = {}
    for main_cat in df["대분류"].unique():
        main_id[main_cat] = df.query("대분류==@main_cat")["ID"].to_list()

    psfa = []
    for main_cat in main_id.keys():
        indices = true["ID"].isin(main_id[main_cat])

        true_arr = true[indices].iloc[:, 1:].to_numpy()
        pred_arr = pred[indices].iloc[:, 1:].to_numpy()

        eps = np.ones((true_arr.shape)) / 1e8

        true_sum = true_arr.sum(axis=0)
        true_sum = np.stack([true_sum]*len(true_arr)) + eps
        true_rate = true_arr / true_sum

        abs_error = np.abs(true_arr - pred_arr)
        denom = np.maximum(true_arr, pred_arr+eps)
        
        score = 1 - (1 / true_arr.shape[1]
                     * (abs_error / denom) * true_rate).sum()
        psfa.append(score)
        print(main_cat, score)

    return np.mean(psfa)

In [6]:
class _Loss(nn.Module):
    reduction: str

    def __init__(self, size_average=None, reduce=None, reduction: str = 'mean') -> None:
        super().__init__()
        if size_average is not None or reduce is not None:
            self.reduction: str = _Reduction.legacy_get_string(size_average, reduce)
        else:
            self.reduction = reduction
    
class PsfaLoss(_Loss):
    def __init__(self, main_cats, size_average = None, reduce = None, reduction: str = 'mean') -> None:
        super().__init__(size_average, reduce, reduction)
        self.main_cats = main_cats
    
    def forward(self, pred, true):
        # pred: [batch_size, length, products(15890)]
        self.pred = pred
        self.true = true

        L1scaled = torch.abs(self.true-self.pred) / torch.maximum(self.pred, self.true+1e-8)
        
        rate = torch.zeros_like(self.true)
        for i in range(len(self.main_cats)):
            rate[:, :, self.main_cats[i]] = \
                true[:, :, self.main_cats[i]] \
                / (true[:, :, self.main_cats[i]].sum(dim=-1, keepdim=True) + 1e-8) \
                / len(self.main_cats)
        return (L1scaled * rate).sum() / (self.true.shape[0] * self.true.shape[1])

## Training Dataset

In [7]:
class CustomDataset(Dataset):
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y
        
    def __getitem__(self, index):
        if self.Y is not None:
            return torch.Tensor(self.X[index]), torch.Tensor(self.Y[index])
        return torch.Tensor(self.X[index])
    
    def __len__(self):
        return len(self.X)

#### Call the Dataset Array

In [8]:
train_input = np.load(LOADPATH + 'train_input_minmax.npy')
train_target = np.load(LOADPATH + 'train_target_minmax.npy')

In [9]:
train_input.shape, train_target.shape

((5005350, 90, 9), (5005350, 21))

In [10]:
data_len = len(train_input)
train_dataset = CustomDataset(train_input[:-int(data_len*0.2)], train_target[:-int(data_len*0.2)])
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(train_input[-int(data_len*0.2):], train_target[-int(data_len*0.2):])
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [11]:
def train(model, optimizer, train_loader, val_loader, device):
    model.to(device)
    criterion = nn.MSELoss().to(device)
    best_loss = 9999999
    best_model = None
    train_loss = {}
    val_loss = {}
    
    # Epoch
    for epoch in range(1, CFG['EPOCHS']+1):
        model.train()
        train_loss_li = []
        train_mae = []
    
    # Iteration
        for X, Y in tqdm(iter(train_loader)):
            X = X.to(device)
            Y = Y.to(device)
            
            optimizer.zero_grad()
            
            output = model(X)
            loss = criterion(output, Y)
            
            loss.backward()
            optimizer.step()
            
            train_loss_li.append(loss.item())
        
        val_loss_li = validation(model, val_loader, criterion, device)
        val_loss_mean = np.mean(val_loss_li)
        print(f'Epoch : [{epoch}] Train Loss : [{np.mean(train_loss_li):.5f}] Val Loss : [{val_loss_mean:.5f}]')
        
        if best_loss > val_loss_mean:
            best_loss = val_loss_mean
            best_model = model
            print('Model Saved')

        train_loss[epoch] = train_loss_li
        val_loss[epoch] = val_loss_li

    return best_model, train_loss, val_loss

In [12]:
def validation(model, val_loader, criterion, device):
    model.eval()
    val_loss = []
    
    with torch.no_grad():
        for X, Y in tqdm(iter(val_loader)):
            X = X.to(device)
            Y = Y.to(device)
            
            output = model(X)
            loss = criterion(output, Y)
            
            val_loss.append(loss.item())
    return val_loss

In [13]:
model = GRU()
optimizer = optim.Adam(params = model.parameters(), lr = CFG['LEARNING_RATE'])
infer_model, train_loss, val_loss = train(model, optimizer, train_loader, val_loader, device)

  0%|          | 0/3911 [00:00<?, ?it/s]

  0%|          | 0/978 [00:00<?, ?it/s]

Epoch : [1] Train Loss : [0.03161] Val Loss : [0.03210]
Model Saved


  0%|          | 0/3911 [00:00<?, ?it/s]

  0%|          | 0/978 [00:00<?, ?it/s]

Epoch : [2] Train Loss : [0.02069] Val Loss : [0.01894]
Model Saved


  0%|          | 0/3911 [00:00<?, ?it/s]

  0%|          | 0/978 [00:00<?, ?it/s]

Epoch : [3] Train Loss : [0.01852] Val Loss : [0.03172]


  0%|          | 0/3911 [00:00<?, ?it/s]

  0%|          | 0/978 [00:00<?, ?it/s]

Epoch : [4] Train Loss : [0.01821] Val Loss : [0.01749]
Model Saved


  0%|          | 0/3911 [00:00<?, ?it/s]

  0%|          | 0/978 [00:00<?, ?it/s]

Epoch : [5] Train Loss : [0.01778] Val Loss : [0.01747]
Model Saved


  0%|          | 0/3911 [00:00<?, ?it/s]

  0%|          | 0/978 [00:00<?, ?it/s]

Epoch : [6] Train Loss : [0.01762] Val Loss : [0.01752]


  0%|          | 0/3911 [00:00<?, ?it/s]

  0%|          | 0/978 [00:00<?, ?it/s]

Epoch : [7] Train Loss : [0.01749] Val Loss : [0.01758]


  0%|          | 0/3911 [00:00<?, ?it/s]

  0%|          | 0/978 [00:00<?, ?it/s]

Epoch : [8] Train Loss : [0.01737] Val Loss : [0.01747]
Model Saved


  0%|          | 0/3911 [00:00<?, ?it/s]

  0%|          | 0/978 [00:00<?, ?it/s]

Epoch : [9] Train Loss : [0.01714] Val Loss : [0.01754]


  0%|          | 0/3911 [00:00<?, ?it/s]

  0%|          | 0/978 [00:00<?, ?it/s]

Epoch : [10] Train Loss : [0.01684] Val Loss : [0.01749]


In [14]:
SAVEPATH = os.getcwd() + '/GRU_2l_0822_1e_4.pth'
torch.save(infer_model.state_dict(), SAVEPATH)

train_loss = np.array(train_loss)
val_loss = np.array(val_loss)

# loss값 저장
np.save(PATH + 'train_loss_gru_2l_0822_1e_4', train_loss)
np.save(PATH + 'val_loss_gru_2l_0822_1e_4', val_loss)

## Model Load

In [None]:
test_input = np.load(LOADPATH + 'test_input_minmax.npy')
min_ = np.load(PATH + 'MinMax/MIN.npy')
max_ = np.load(PATH + 'MinMax/MAX.npy')

In [None]:
test_dataset = CustomDataset(test_input, None)
test_loader = DataLoader(test_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

def inference(model, test_loader, device):
    model = model.to(device)
    predictions = []
    
    with torch.no_grad():
        for X in tqdm(iter(test_loader)):
            X = X.to(device)
            
            output = model(X)
            
            # 모델 출력인 output을 CPU로 이동하고 numpy 배열로 변환
            output = output.cpu().numpy()
            
            predictions.extend(output)
    
    return np.array(predictions)

In [None]:
model = GRU()

if platform.system() == "Darwin":
    model.load_state_dict(torch.load(os.getcwd() + '/GRU_2l_Bin.pth', map_location=torch.device('cpu')))
else:
    model.load_state_dict(torch.load(os.getcwd() + '/GRU_2l_Bin.pth'))

### Predictions to Submit

In [None]:
prediction = inference(infer_model, test_loader, device)
prediction.shape

In [None]:
def return_scaler(arr, min_ = min_, max_ = max_):
    denom = max_ - min_
    for i in tqdm(range(len(arr))):
        arr[i] = (arr[i]*denom[i]) + min_[i]
    
    return arr

prediction_unscale = np.around(return_scaler(prediction))
prediction_unscale

In [None]:
submit = pd.read_csv(PATH + '/sample_submission.csv')
submit.iloc[:, 1:] = prediction_unscale
submit

In [None]:
SAVE_SUBMIT = os.getcwd() + '/data/GRU_2_layers_0821_version2.csv'
submit.to_csv(SAVE_SUBMIT, index_label=False)

### 연습장

In [None]:
train_input = np.load(LOADPATH + 'train_input_2.npy')
train_target = np.load(LOADPATH + 'train_target_2.npy')
main_cats = np.load(os.getcwd() + '/main_cats.npy', allow_pickle=True)

In [None]:
data_len = len(train_input)
train_dataset = CustomDataset(train_input[:-int(data_len*0.2)], train_target[:-int(data_len*0.2)])
train_loader = DataLoader(train_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=True, num_workers=0)

val_dataset = CustomDataset(train_input[-int(data_len*0.2):], train_target[-int(data_len*0.2):])
val_loader = DataLoader(val_dataset, batch_size = CFG['BATCH_SIZE'], shuffle=False, num_workers=0)

In [None]:
model = GRU()
optimizer = optim.Adam(params = model.parameters(), lr = CFG['LEARNING_RATE'])
model.load_state_dict(torch.load(os.getcwd() + '/GRU_2l_1e_4.pth'))

In [None]:
criterion = PsfaLoss(main_cats).cpu()

In [None]:
model = model.cpu()

for X, Y in iter(train_loader):
    X = X.cpu()
    Y = Y.cpu()
    break

In [None]:
print(X.shape)
X