In [1]:
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import sys
import easydict
import numpy as np
import os
import wandb
import warnings
from glob import glob
warnings.filterwarnings("ignore")

sys.path.append('informer')
from inference import Predictor
from informer.models.model import Informer
from dataset import CustomDataset, load_data
from utils import seed_everything
import time

In [2]:
args = easydict.EasyDict({
    "model" : "Informer",
    "device"    : torch.device("cuda:0"),
    "input_window" : 112,
    "target_window" : 28,
    "label_len" : 56,
    "target_n"   : 21,
    "learning_rate"  : 1e-3,                   
    "batch_size"    : 128,                   
    "epochs" : 100,               
    "path" : "../data/train.csv",
    "inference_sample" : "../inference/sample_submission.csv",
    "inference_sliced" : "../data/private/test/*.csv",
    'inference_result' : "../inference/result.csv",
    "save_path"    : "../models",
    "use_best_model": False,
    "enc_in" : 2, # input feature dim,
    "dec_in" : 1, # output feature dim
    "wandb" : True,
    "randomseed" : False
})
NAME_ELEMENTS = [args.model, str(args.learning_rate), str(args.batch_size), str(args.epochs), args.path.split('/')[-1].split('.')[0], time.strftime("%m%d_%H%M", time.localtime(time.time()))]
MODEL_NAME = '_'.join(NAME_ELEMENTS)

In [3]:
if args.randomseed:
    seed_everything(args.randomseed)

In [4]:
def nmae(pred, true, scaler):
    pred = torch.from_numpy(scaler.inverse_transform(pred)).view(-1, 1)
    true = torch.from_numpy(scaler.inverse_transform(true)).view(-1, 1)

    score = torch.mean(torch.abs((true-pred))/(true))
    #print(torch.mean(pred), torch.mean(true))
    return score

criterion = nn.L1Loss() # mae
P = Predictor(args.inference_sample, args.inference_sliced)

In [None]:
breed_list = [
        '배추', '무', '양파', '건고추','마늘',
        '대파', '얼갈이배추', '양배추', '깻잎',
        '시금치', '미나리', '당근',
        '파프리카', '새송이', '팽이버섯', '토마토',
        '청상추', '백다다기', '애호박', '캠벨얼리', '샤인마스캇'
    ]
    
if args.wandb:
    run = wandb.init(entity="yai_timeseries", project="nongsanmul", name=MODEL_NAME, config=args, reinit=True)
for breed in breed_list:
    model = Informer(
                enc_in=args.enc_in, 
                dec_in=args.dec_in, 
                c_out= 1,
                out_len=args.target_window,
                ).to(args.device)
    optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
                                                                optimizer, 
                                                                T_0=10, 
                                                                T_mult=2, 
                                                                eta_min=1e-7
                                                                )
                                                                
    train_loader, valid_loader, dataset = load_data(
                                                    args.path, 
                                                    breed, 
                                                    args.input_window, 
                                                    args.target_window, 
                                                    args.label_len, 
                                                    batch_size=args.batch_size
                                                    )
    price_scaler = dataset.std_scaler
    volume_scaler = dataset.std_scaler_volume
    if not os.path.exists(os.path.join(args.save_path, breed)):
        os.makedirs(os.path.join(args.save_path, breed), exist_ok=True)
    ### Training
    best_nmae = np.Inf
    print("Training :", breed)
    for epoch in range(args.epochs):
        ### Training
        model.train()
        train_loss = []
        train_nmae = []
        for i, data in enumerate(train_loader):
            price = data['price_std'].unsqueeze(2)
            volume = data['volume_std'].unsqueeze(2)
            x_mark = data['x_mark'].permute(0, 2, 1).to(args.device)
            y_mark = data['y_mark'].permute(0, 2, 1).to(args.device)
            y = data['y'].unsqueeze(2)
            gt = y[:, -args.target_window:, :].to(args.device)

            x = torch.cat([price, volume], dim=2).to(args.device)
            dec_inp = torch.zeros([y.shape[0], args.target_window, 1])

            y = torch.cat([y[:, :args.label_len, :], dec_inp], dim=1).to(args.device)
            
            
            optimizer.zero_grad()
            outputs = model(x, x_mark, y, y_mark)
            

            gt = gt[:, [6, 13, 27], :]
            outputs = outputs[:, [6, 13, 27], :]

            loss = criterion(outputs, gt)
            score = nmae(outputs.detach().cpu(), gt.detach().cpu(), price_scaler)

            train_nmae.append(score)
            train_loss.append(loss.item())
            loss.backward()
            optimizer.step()
        train_loss = np.mean(train_loss)
        train_nmae = np.mean(train_nmae)

        ### Validation
        model.eval()
        with torch.no_grad():
            val_loss = []
            val_nmae = []
            for i, data in enumerate(valid_loader):
                price = data['price_std'].unsqueeze(2)
                volume = data['volume_std'].unsqueeze(2)
                x_mark = data['x_mark'].permute(0, 2, 1).to(args.device)
                y_mark = data['y_mark'].permute(0, 2, 1).to(args.device)
                y = data['y'].unsqueeze(2)
                gt = y[:, -args.target_window:, :].to(args.device)

                x = torch.cat([price, volume], dim=2).to(args.device)
                dec_inp = torch.zeros([y.shape[0], args.target_window, 1])

                y = torch.cat([y[:, :args.label_len, :], dec_inp], dim=1).to(args.device)
                
                outputs = model(x, x_mark, y, y_mark)

                gt = gt[:, [6, 13, 27], :]
                outputs = outputs[:, [6, 13, 27], :]
                #
                loss = criterion(outputs, gt)
                score = nmae(outputs.detach().cpu(), gt.detach().cpu(), price_scaler)
                
                val_nmae.append(score)
                val_loss.append(loss.item())
            val_nmae = np.mean(val_nmae)
            val_loss = np.mean(val_loss)
            print("Epoch: {:>2}/{} | Validation loss: {:.6f}".format(epoch+1, args.epochs, val_loss))
            print("       {:>2}/{} | Validation NMAE: {:.6f}".format(epoch+1, args.epochs, val_nmae))
            if val_nmae < best_nmae:
                print(f'Validation NMAE decreased ({best_nmae:.6f} --> {val_nmae:.6f}).  Saving model ...')
                path_dir = [args.save_path, breed, '{:.6f}.pt'.format(val_nmae)]
                torch.save(model.state_dict(), os.path.join(*path_dir))
                best_nmae = val_nmae
        if args.wandb:
            wandb.log({
                breed+"/Train/Loss" : train_loss,
                breed+"/Train/NMAE" : train_nmae,
                breed+"/Val/Loss" : val_loss,
                breed+"/Val/NMAE" : val_nmae
            })
        scheduler.step()
    print("Loading Best Model")
    best_model_dir = [args.save_path, breed, '{:.6f}.pt'.format(best_nmae)]
    model.load_state_dict(torch.load(os.path.join(*best_model_dir)))

    print("Inference :", breed)
    P.get_dataset(args, breed, price_scaler, volume_scaler)
    P.predict(args, model, breed, args.inference_result)
if args.wandb:
    run.finish()
print("Done!")
    
    

[34m[1mwandb[0m: Currently logged in as: [33mstephencha[0m (use `wandb login --relogin` to force relogin)


Training : 배추
Epoch:  1/100 | Validation loss: 0.737300
        1/100 | Validation NMAE: 0.338758
Validation NMAE decreased (inf --> 0.338758).  Saving model ...
Epoch:  2/100 | Validation loss: 0.778890
        2/100 | Validation NMAE: 0.320157
Validation NMAE decreased (0.338758 --> 0.320157).  Saving model ...
Epoch:  3/100 | Validation loss: 0.782489
        3/100 | Validation NMAE: 0.390974
Epoch:  4/100 | Validation loss: 0.752931
        4/100 | Validation NMAE: 0.362006
Epoch:  5/100 | Validation loss: 0.674865
        5/100 | Validation NMAE: 0.264575
Validation NMAE decreased (0.320157 --> 0.264575).  Saving model ...
Epoch:  6/100 | Validation loss: 0.673730
        6/100 | Validation NMAE: 0.276160
Epoch:  7/100 | Validation loss: 0.669608
        7/100 | Validation NMAE: 0.254289
Validation NMAE decreased (0.264575 --> 0.254289).  Saving model ...
Epoch:  8/100 | Validation loss: 0.659231
        8/100 | Validation NMAE: 0.287046
Epoch:  9/100 | Validation loss: 0.694915
 

In [None]:
from scoring import scoring
import pandas as pd

ANSWER_PATH = "../data/public_test.csv"
answer_df = pd.read_csv(ANSWER_PATH)
submission_df = pd.read_csv(args.inference_result)
print("Score: {}".format(scoring(answer_df, submission_df)))