In [1]:
import pandas as pd

import pickle
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, RobustScaler

import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler

from torchmetrics.regression import R2Score, MeanAbsoluteError, MeanAbsolutePercentageError, MeanSquaredError

In [2]:
DATA_PATH = '../Data/'

electric_df = pd.read_csv(DATA_PATH + 'electric_df_clear_29_days.csv')
water_df = pd.read_csv(DATA_PATH + 'water_df_clear_29_days.csv')

In [3]:
# electric_df

In [4]:
water_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,19,20,21,22,23,24,25,26,27,28
0,30,120,210,410,32,184,180,260,35,145,...,95,46,139,204,198,53,162,210,150,51
1,120,210,410,32,184,180,260,35,145,203,...,46,139,204,198,53,162,210,150,51,169
2,210,410,32,184,180,260,35,145,203,216,...,139,204,198,53,162,210,150,51,169,204
3,410,32,184,180,260,35,145,203,216,43,...,204,198,53,162,210,150,51,169,204,169
4,32,184,180,260,35,145,203,216,43,136,...,198,53,162,210,150,51,169,204,169,38
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
285993,157,230,169,47,122,218,350,110,183,190,...,58,218,290,105,10,68,38,263,78,158
285994,230,169,47,122,218,350,110,183,190,230,...,218,290,105,10,68,38,263,78,158,290
285995,169,47,122,218,350,110,183,190,230,98,...,290,105,10,68,38,263,78,158,290,300
285996,47,122,218,350,110,183,190,230,98,143,...,105,10,68,38,263,78,158,290,300,280


In [5]:
# electric_features = electric_df[electric_df.columns[:-1]]
# electric_target = electric_df[electric_df.columns[-1:]]

# electric_X_train, electric_X_test, electric_y_train, electric_y_test = train_test_split(electric_features,
#                                                     electric_target,
#                                                     random_state = 42,
                                                    # test_size = 0.2)

water_features = water_df[water_df.columns[:-1]]
water_target = water_df[water_df.columns[-1:]]

water_X_train, water_X_test, water_y_train, water_y_test = train_test_split(water_features,
                                                                            water_target,
                                                                            random_state = 42,
                                                                            test_size = 0.2)

In [6]:
# electric_X_test

In [7]:
# electric_y_train.info()

In [8]:
class CustomDataset(Dataset):
    def __init__(self, featureDF, targetDF):
        self.featureDF = featureDF
        self.targetDF = targetDF
        self.n_rows = self.featureDF.shape[0]
        self.n_cols = self.featureDF.shape[1]

    def __len__(self):
        return self.n_rows
    
    def __getitem__(self, index):
        featureTS = torch.FloatTensor(self.featureDF.iloc[index].values)
        targetTS = torch.FloatTensor(self.targetDF.iloc[index].values)

        return featureTS, targetTS

In [9]:
class LSTMModel(nn.Module):
    def __init__(self, hidden_dim, input_size, n_layers, dropout,
                 bidirectional):
        super().__init__()

        self.model = nn.LSTM(
            input_size = input_size,
            hidden_size = hidden_dim,
            num_layers = n_layers,
            dropout = dropout,
            bidirectional = bidirectional,
            batch_first = True
        )

        if bidirectional:
            self.linear = nn.Linear(hidden_dim * 2, 1)
        
        else:
            self.linear = nn.Linear(hidden_dim, 1)

        # 성능에 따라 추가
        self.dropout = nn.Dropout(dropout)

    def forward(self, inputs):
        output, _ = self.model(inputs)
        logits = self.linear(output)

        return logits

In [10]:
# electric_mmscaler = MinMaxScaler().fit(electric_X_train)
water_rbscaler = RobustScaler().fit(water_X_train)

# with open('electric_min_max_scaler.pkl', 'wb') as f:
#     pickle.dump(electric_mmscaler, f)

with open('water_robust_scaler.pkl', 'wb') as f:
    pickle.dump(water_rbscaler, f)

In [11]:
# electric_X_train_scaled = electric_mmscaler.transform(electric_X_train)
# electric_X_test_scaled = electric_mmscaler.transform(electric_X_test)

water_X_train_scaled = water_rbscaler.transform(water_X_train)
water_X_test_scaled = water_rbscaler.transform(water_X_test)

# electric_X_train = pd.DataFrame(electric_X_train_scaled, columns = electric_X_train.columns)
# electric_X_test = pd.DataFrame(electric_X_test_scaled, columns = electric_X_test.columns)

water_X_train = pd.DataFrame(water_X_train_scaled, columns = water_X_train.columns)
water_X_test = pd.DataFrame(water_X_test_scaled, columns = water_X_test.columns)

In [12]:
EPOCH = 1000
BATCH_SIZE = 64
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
LR = 0.001

In [13]:
# electric_trainDS = CustomDataset(electric_X_train, electric_y_train)
water_trainDS = CustomDataset(water_X_train, water_y_train)

# electric_trainDL = DataLoader(electric_trainDS, batch_size = BATCH_SIZE)
water_trainDL = DataLoader(water_trainDS, batch_size = BATCH_SIZE)

In [14]:
input_size = 28
hidden_dim = 32
n_layers = 2
dropout = 0.5

lstm_model = LSTMModel(input_size = input_size, hidden_dim = hidden_dim,
                       n_layers = n_layers, dropout = 0.8, bidirectional = True).to(DEVICE)


In [17]:
MAEloss = MeanAbsoluteError()
MAPEloss = MeanAbsolutePercentageError()
MSEloss = MeanSquaredError()
R2score = R2Score()

optimizer = optim.RMSprop(lstm_model.parameters(), lr = LR)

scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode = 'min', patience = 10, verbose = True)



In [18]:
def testing(featureDF, targetDF, model):
    featureTS = torch.FloatTensor(featureDF.values).to(DEVICE)
    targetTS = torch.FloatTensor(targetDF.values).to(DEVICE)

    model.dropout = nn.Dropout(0)
    
    model.eval()
    
    with torch.no_grad():
        pre_val = model(featureTS)
        mae_loss_val = MAEloss(pre_val, targetTS)
        mape_loss_val = MAPEloss(pre_val, targetTS)
        mse_loss_val = MSEloss(pre_val, targetTS)
        score_val = R2score(pre_val, targetTS)
    
    return mae_loss_val, mape_loss_val, mse_loss_val, score_val, pre_val

In [19]:


def training(testDF, testtargetDF, model, trainDL, test_value):
    SAVE_PATH = './saved_models/'
    os.makedirs(SAVE_PATH, exist_ok = True)
    
    BREAK_CNT_LOSS = 0
    BREAK_CNT_SCORE = 0
    LIMIT_VALUE = 10

    MAE_LOSS_HISTORY, MAPE_LOSS_HISTORY, MSE_LOSS_HISTORY, SCORE_HISTORY = [[], []], [[], []], [[], []], [[], []]

    for epoch in range(1, EPOCH + 1):
        SAVE_MODEL = os.path.join(SAVE_PATH, f'model_{epoch}.pth')
        SAVE_WEIGHT = os.path.join(SAVE_PATH, f'model_weights_{epoch}.pth')

        mae_loss_total, mape_loss_total, mse_loss_total, score_total = 0, 0, 0, 0

        for featureTS, targetTS in trainDL:
            pre_y = model(featureTS)

            mae_loss = MAEloss(pre_y, targetTS)
            mape_loss = MAPEloss(pre_y, targetTS)
            mse_loss = MSEloss(pre_y, targetTS)

            mae_loss_total += mae_loss.item()
            mape_loss_total += mape_loss.item()
            mse_loss_total += mse_loss.item()

            score = R2score(pre_y, targetTS)
            score_total += score.item()

            total_loss = mae_loss + mape_loss + mse_loss

            optimizer.zero_grad()
            
            total_loss.backward()

            optimizer.step()

        test_mae_loss, test_mape_loss, test_mse_loss, test_score, pre_val = testing(testDF, testtargetDF, model)

        MAE_LOSS_HISTORY[1].append(test_mae_loss)
        MAPE_LOSS_HISTORY[1].append(test_mape_loss)
        MSE_LOSS_HISTORY[1].append(test_mse_loss)
        SCORE_HISTORY[1].append(test_score)

        MAE_LOSS_HISTORY[0].append(mae_loss_total / len(trainDL))
        MAPE_LOSS_HISTORY[0].append(mape_loss_total / len(trainDL))
        MSE_LOSS_HISTORY[0].append(mse_loss_total / len(trainDL))
        SCORE_HISTORY[0].append(score_total / len(trainDL))

        print(f'pre_val : {pre_val.squeeze().tolist()[:10]}\ny_val : {test_value.values.squeeze()[:10]}\n')
        print(f'[{epoch} / {EPOCH}]\n- TRAIN MAE LOSS : {MAE_LOSS_HISTORY[0][-1]}')
        print(f'- TRAIN MAPE LOSS : {MAPE_LOSS_HISTORY[0][-1]}')
        print(f'- TRAIN MSE LOSS : {MSE_LOSS_HISTORY[0][-1]}')
        print(f'- TRAIN R2 SCORE : {SCORE_HISTORY[0][-1]}')

        print(f'\n- TEST MAE LOSS : {MAE_LOSS_HISTORY[1][-1]}')
        print(f'- TEST MAPE LOSS : {MAPE_LOSS_HISTORY[1][-1]}')
        print(f'- TEST MSE LOSS : {MSE_LOSS_HISTORY[1][-1]}')
        print(f'- TEST R2 SCORE : {SCORE_HISTORY[1][-1]}')

        scheduler.step(test_mae_loss)

        if len(MAE_LOSS_HISTORY[1]) >= 2:
            if MAE_LOSS_HISTORY[1][-1] >= MAE_LOSS_HISTORY[1][-2]: BREAK_CNT_LOSS += 1
        
        if len(MAE_LOSS_HISTORY[1]) == 1:
            torch.save(model.state_dict(), SAVE_WEIGHT)
            torch.save(model, SAVE_MODEL)

        else:
            if MAE_LOSS_HISTORY[1][-1] < min(MAE_LOSS_HISTORY[1][:-1]):
                torch.save(model.state_dict(), SAVE_WEIGHT)
                torch.save(model, SAVE_MODEL)

        if BREAK_CNT_LOSS > LIMIT_VALUE:
            print(f"성능 및 손실 개선이 없어서 {epoch} EPOCH에 학습 중단")
            # break

    return MAE_LOSS_HISTORY, MAPE_LOSS_HISTORY, MSE_LOSS_HISTORY, SCORE_HISTORY
        

In [20]:
water_y_test

Unnamed: 0,28
119333,89
236638,48
117513,247
209007,247
207409,0
...,...
43073,183
52402,138
277853,68
59613,169


In [21]:
mae_loss, mape_loss, mse_loss, r2 = training(water_X_test, water_y_test, lstm_model, water_trainDL, water_y_test)


pre_val : [-0.0005031281616538763, 0.0002174127148464322, -0.0006967181107029319, -0.002038670936599374, 0.0005717736785300076, 0.0012404473964124918, 0.0006877220585010946, 0.00014109385665506124, -0.0002617588033899665, -0.0001903116935864091]
y_val : [ 89  48 247 247   0  78 230  47 230 110]

[1 / 1000]
- TRAIN MAE LOSS : 148.4133433671431
- TRAIN MAPE LOSS : 56.276339930954514
- TRAIN MSE LOSS : 27965.38809112762
- TRAIN R2 SCORE : -3.917754272380909

- TEST MAE LOSS : 148.40951538085938
- TEST MAPE LOSS : 4.171219825744629
- TEST MSE LOSS : 27992.443359375
- TEST R2 SCORE : -3.691161632537842
pre_val : [0.2561482787132263, 0.786186933517456, 8.731070518493652, 8.226560592651367, -0.027132399380207062, -0.028904732316732407, -0.016484947875142097, 0.26703178882598877, 5.556141376495361, 9.656758308410645]
y_val : [ 89  48 247 247   0  78 230  47 230 110]

[2 / 1000]
- TRAIN MAE LOSS : 147.0631425849541
- TRAIN MAPE LOSS : 393.19455222089806
- TRAIN MSE LOSS : 27496.623267045456
- T

KeyboardInterrupt: 

In [None]:
# mae loss = 8.3237
state_dict = torch.load('./saved_models/model_weights_30.pth', map_location=torch.device('cpu'))
lstm_model.load_state_dict(state_dict)
lstm_model.eval()

featureDF = water_X_test
targetDF = water_y_test

featureTS = torch.FloatTensor(featureDF.values).to(DEVICE)
targetTS = torch.FloatTensor(targetDF.values).to(DEVICE)

pre_y = lstm_model(featureTS)
mae_loss = MAEloss(pre_y, targetTS)
print(mae_loss)

  state_dict = torch.load('./saved_models/model_weights_30.pth', map_location=torch.device('cpu'))


tensor(8.3237, grad_fn=<CloneBackward0>)
