In [None]:
import numpy as np
import pandas as pd
import pickle
import os
import random
import torch
from torch.autograd import Variable
from torch.utils.data import DataLoader, TensorDataset
from torch.optim import Adam
import torch.nn as nn

from utils.tools import dotdict
from models.NLinear import Model as NLinear

device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
print(device)

PRED_DAYS = len(pd.read_csv('data/공주test.csv'))

In [None]:
def seed_everything(seed: int = 302):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # type: ignore
    torch.backends.cudnn.deterministic = True  # type: ignore
    torch.backends.cudnn.benchmark = True  # type: ignore

In [None]:
seed_everything()

In [None]:
loc_map = {'공주':'GJ', '노은동':'NED', '논산':'NS', '대천2동':'DCH', '독곶리':'DGL', 
              '동문동':'DMD', '모종동':'MJD', '문창동':'MCHD', '성성동':'SSD', 
              '신방동':'SBD', '신흥동':'SHD', '아름동':'ALD', '예산군':'YSG', '읍내동':'YND', 
              '이원면':'YWM', '정림동':'JLD', '홍성읍':'HSE'}
col_map = {'기온':'temperature', '풍향':'wind_direction', '풍속':'wind_speed', '강수량':'precipitation', '습도':'humidity'}

In [None]:
def columns_to_array(PATH):
    
    # DataFrame에서 각 feature column의 값을 분해
    
    df = pd.read_csv(PATH)
    temp = np.array(df['기온'])
    wind_d = np.array(df['풍향'])
    wind_s = np.array(df['풍속'])
    prec = np.array(df['강수량'])
    humid = np.array(df['습도'])
    
    return [temp, wind_d, wind_s, prec, humid]

def sliding_window_x(data, window_size):
    
    # sliding_window 기법으로 data 생성: x
    
    size = data.size
    x = list()
    
    for i in range(0, size-window_size+1):
        x.append(data[i:i+window_size])
    
    return np.array(x)

def sliding_window_xy(data, window_size, forecast_size):
    
    # sliding_window 기법으로 data 생성: (x,y)
    
    size = data.size
    x = list()
    y = list()
    
    for i in range(0, size-window_size-forecast_size+1):
        x.append(data[i:i+window_size])
        y.append(data[i+window_size:i+window_size+forecast_size])        
    
    return np.array(x), np.array(y)

In [None]:
class Optimization:
    
    # NLinear 모델 
    
    def __init__(self, flag, model, loss_fn, optimizer, learning_rate):
        self.flag = flag
        self.model = model
        self.loss_fn = loss_fn
        self.optimizer = optimizer
        self.lr = learning_rate
        self.train_losses= []
        self.val_losses = []
        self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, mode='min', factor=0.1, patience=10, threshold_mode='abs', min_lr=1e-8, verbose=True)
        
    def train_step(self, x, y):
        self.model.train()
                
        yhat = self.model(x)
        
        loss = self.loss_fn(y, yhat)

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
            
        return loss.item()
    
    def train(self, train_loader, vali_loader, epochs=50):
        min_loss = 99999999999
        for epoch in range(1, epochs+1):
            batch_losses = []
            for i, (train_x, train_y) in enumerate(train_loader):
                loss = self.train_step(train_x, train_y)
                batch_losses.append(loss)
            training_loss = np.mean(batch_losses)
            self.train_losses.append(training_loss)
                        
            with torch.no_grad():
                batch_val_losses = []
                for vali_x, vali_y in vali_loader:
                    self.model.eval()
                    yhat = self.model(vali_x)
                    val_loss = self.loss_fn(vali_y, yhat).item()
                    batch_val_losses.append(val_loss)
                validation_loss = np.mean(batch_val_losses)
                self.val_losses.append(validation_loss)

            print('EPOCH [{}/{}]: Training Loss: {}, Validation Loss: {}'.format(epoch, epochs, np.mean(training_loss), validation_loss))
            
            self.scheduler.step(validation_loss)
            if min_loss > validation_loss:
                min_loss = validation_loss
                torch.save(self.model.state_dict(), 'weights/{}_bestmodel.pth'.format(self.flag))
                print('MODEL SAVED')    
                                                                    
    def predict(self, test_loader):
        with torch.no_grad():
            preds = []
            for test_x in test_loader:
                self.model.eval()
                pred = self.model(test_x)
                pred = pred.detach().cpu().numpy()
                preds.append(pred)
        preds = np.array(preds)
        
        return preds.flatten()

In [None]:
with open('loc_match.pkl', 'rb') as f:
    loc_match = pickle.load(f)

window_size = 24*2 # input: 2일
forecast_size = 24*3 # output: 3일
batch_size = 32

cols = ['기온', '풍향', '풍속', '강수량', '습도']

In [None]:
for pm_loc in loc_match:
    vals_of_cols_train = columns_to_array('data/{}train.csv'.format(pm_loc))
    # vals_of_cols = [temp, wind_d, wind_s, prec, humid]

    for index, col in enumerate(cols):
        x, y = sliding_window_xy(vals_of_cols_train[index], window_size, forecast_size)
        LEN = x.shape[0]
        train_size = int(LEN*0.8)
        
        train_x = Variable(torch.Tensor(x[:train_size])).to(device)
        train_y = Variable(torch.Tensor(y[:train_size])).to(device)
        vali_x = Variable(torch.Tensor(x[train_size:])).to(device)
        vali_y = Variable(torch.Tensor(y[train_size:])).to(device)
        train_x = train_x.reshape(-1, window_size, 1)
        vali_x = vali_x.reshape(-1, window_size, 1)
        train_y = train_y.reshape(-1, forecast_size, 1)
        vali_y = vali_y.reshape(-1, forecast_size, 1)
        train = TensorDataset(train_x, train_y)
        vali = TensorDataset(vali_x, vali_y)
        train_loader = DataLoader(train, batch_size, shuffle=True)
        vali_loader = DataLoader(vali, batch_size, shuffle=False)
        
        args_nlinear = dotdict()

        args_nlinear.seq_len = window_size
        args_nlinear.pred_len = forecast_size
        args_nlinear.enc_in = 1
        args_nlinear.individual = False

        model = NLinear(args_nlinear).to(device)

        learning_rate = 1e-05
        loss_fn = nn.MSELoss()
        optimizer = Adam(model.parameters(), lr=learning_rate, weight_decay=0.1)
        
        opt = Optimization('{}_{}'.format(loc_map[pm_loc], col_map[col]), model, loss_fn, optimizer, learning_rate)
        opt.train(train_loader, vali_loader)
        
        PATH = 'weights/{}_{}_bestmodel.pth'.format(loc_map[pm_loc], col_map[col])
        state_dict = torch.load(PATH)
        model = model.load_state_dict(state_dict)
        
        n = 5
        vals_of_cols_test = columns_to_array('data/{}test.csv'.format(pm_loc))
        # vals_of_cols = [temp, wind_d, wind_s, prec, humid]
        while n <= PRED_DAYS/24-2:
            x = vals_of_cols_test[index][24*(n-5):24*(n-3)]
            x = Variable(torch.Tensor(x)).to(device)
            x = x.reshape(-1, window_size, 1)
            test_loader = DataLoader(x, x.shape[0], shuffle=False)
            preds = opt.predict(test_loader)
            vals_of_cols_test[index][24*(n-3):24*n] = preds
            n+=5
        print('{} {} 예측 완료'.format(pm_loc, col))
        df_test = pd.read_csv('data/{}test.csv'.format(pm_loc))
        df_test[col] = vals_of_cols_test[index]
        df_test[cols].to_csv('data/{}test.csv'.format(pm_loc))

EPOCH [1/50]: Training Loss: 0.019320429425294463, Validation Loss: 0.017081525474446548
MODEL SAVED
EPOCH [2/50]: Training Loss: 0.01779285360242771, Validation Loss: 0.01572029537959322
MODEL SAVED
EPOCH [3/50]: Training Loss: 0.016496127190012953, Validation Loss: 0.014559737697975141
MODEL SAVED
EPOCH [4/50]: Training Loss: 0.01538928035535856, Validation Loss: 0.013569006534744072
MODEL SAVED
EPOCH [5/50]: Training Loss: 0.014445636516279227, Validation Loss: 0.012729274702245651
MODEL SAVED
EPOCH [6/50]: Training Loss: 0.013644153885202358, Validation Loss: 0.012016573302090576
MODEL SAVED
EPOCH [7/50]: Training Loss: 0.012964958087402636, Validation Loss: 0.011412289274269587
MODEL SAVED
EPOCH [8/50]: Training Loss: 0.012394087261241143, Validation Loss: 0.010911092265258165
MODEL SAVED
EPOCH [9/50]: Training Loss: 0.011921569930381, Validation Loss: 0.010503974078716413
MODEL SAVED
EPOCH [10/50]: Training Loss: 0.01153452451302493, Validation Loss: 0.010152311886120672
MODEL SA