In [5]:
import pandas as pd

import pickle
import os

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, RobustScaler

import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler

from torchmetrics.regression import R2Score, MeanAbsoluteError, MeanAbsolutePercentageError, MeanSquaredError

In [6]:
DATA_PATH = '../Data/'

# electric_df = pd.read_=8csv(DATA_PATH + 'electric_df.csv', index_col = 0)
water_df = pd.read_csv(DATA_PATH + 'water_df.csv', index_col = 0)

In [7]:
# electric_df

In [8]:
water_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,19,20,21,22,23,24,25,26,27,28
0,30,120,210,410,32,184,180,260,35,145,...,95,46,139,204,198,53,162,210,150,51
1,120,210,410,32,184,180,260,35,145,203,...,46,139,204,198,53,162,210,150,51,169
2,210,410,32,184,180,260,35,145,203,216,...,139,204,198,53,162,210,150,51,169,204
3,410,32,184,180,260,35,145,203,216,43,...,204,198,53,162,210,150,51,169,204,169
4,32,184,180,260,35,145,203,216,43,136,...,198,53,162,210,150,51,169,204,169,38
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
339995,157,230,169,47,122,218,350,110,183,190,...,58,218,290,105,10,68,38,263,78,158
339996,230,169,47,122,218,350,110,183,190,230,...,218,290,105,10,68,38,263,78,158,290
339997,169,47,122,218,350,110,183,190,230,98,...,290,105,10,68,38,263,78,158,290,300
339998,47,122,218,350,110,183,190,230,98,143,...,105,10,68,38,263,78,158,290,300,280


In [9]:
# electric_features = electric_df[electric_df.columns[:-1]]
# electric_target = electric_df[electric_df.columns[-1:]]

# electric_X_train, electric_X_test, electric_y_train, electric_y_test = train_test_split(electric_features,
#                                                     electric_target,
#                                                     random_state = 42,
                                                    # test_size = 0.2)

water_features = water_df[water_df.columns[:-1]]
water_target = water_df[water_df.columns[-1:]]

water_X_train, water_X_test, water_y_train, water_y_test = train_test_split(water_features,
                                                                            water_target,
                                                                            random_state = 42,
                                                                            test_size = 0.2)

In [10]:
# electric_X_test

In [11]:
# electric_y_train.info()

In [12]:
class CustomDataset(Dataset):
    def __init__(self, featureDF, targetDF):
        self.featureDF = featureDF
        self.targetDF = targetDF
        self.n_rows = self.featureDF.shape[0]
        self.n_cols = self.featureDF.shape[1]

    def __len__(self):
        return self.n_rows
    
    def __getitem__(self, index):
        featureTS = torch.FloatTensor(self.featureDF.iloc[index].values)
        targetTS = torch.FloatTensor(self.targetDF.iloc[index].values)

        return featureTS, targetTS

In [13]:
class LSTMModel(nn.Module):
    def __init__(self, hidden_dim, input_size, n_layers, dropout,
                 bidirectional):
        super().__init__()

        self.model = nn.LSTM(
            input_size = input_size,
            hidden_size = hidden_dim,
            num_layers = n_layers,
            dropout = dropout,
            bidirectional = bidirectional,
            batch_first = True
        )

        if bidirectional:
            self.linear = nn.Linear(hidden_dim * 2, 1)
        
        else:
            self.linear = nn.Linear(hidden_dim, 1)

        # 성능에 따라 추가
        # self.dropout = nn.Dropout(dropout)

    def forward(self, inputs):
        output, _ = self.model(inputs)
        logits = self.linear(output)

        return logits

In [14]:
# electric_mmscaler = MinMaxScaler().fit(electric_X_train)
water_rbscaler = RobustScaler().fit(water_X_train)

# with open('electric_min_max_scaler.pkl', 'wb') as f:
#     pickle.dump(electric_mmscaler, f)

with open('water_robust_scaler.pkl', 'wb') as f:
    pickle.dump(water_rbscaler, f)

In [15]:
# electric_X_train_scaled = electric_mmscaler.transform(electric_X_train)
# electric_X_test_scaled = electric_mmscaler.transform(electric_X_test)

water_X_train_scaled = water_rbscaler.transform(water_X_train)
water_X_test_scaled = water_rbscaler.transform(water_X_test)

# electric_X_train = pd.DataFrame(electric_X_train_scaled, columns = electric_X_train.columns)
# electric_X_test = pd.DataFrame(electric_X_test_scaled, columns = electric_X_test.columns)

water_X_train = pd.DataFrame(water_X_train_scaled, columns = water_X_train.columns)
water_X_test = pd.DataFrame(water_X_test_scaled, columns = water_X_test.columns)

In [16]:
EPOCH = 100
BATCH_SIZE = 32
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
LR = 0.001

In [17]:
# electric_trainDS = CustomDataset(electric_X_train, electric_y_train)
water_trainDS = CustomDataset(water_X_train, water_y_train)

# electric_trainDL = DataLoader(electric_trainDS, batch_size = BATCH_SIZE)
water_trainDL = DataLoader(water_trainDS, batch_size = BATCH_SIZE)

In [18]:
input_size = 28
hidden_dim = 128
n_layers = 2
dropout = 0.5

lstm_model = LSTMModel(input_size = input_size, hidden_dim = hidden_dim,
                       n_layers = n_layers, dropout = 0.9, bidirectional = True).to(DEVICE)


In [19]:
MAEloss = MeanAbsoluteError()
MAPEloss = MeanAbsolutePercentageError()
MSEloss = MeanSquaredError()
R2score = R2Score()

optimizer = optim.Adam(lstm_model.parameters(), lr = LR)

scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode = 'min', patience = 100, verbose = True)



In [20]:
def testing(featureDF, targetDF, model):
    featureTS = torch.FloatTensor(featureDF.values).to(DEVICE)
    targetTS = torch.FloatTensor(targetDF.values).to(DEVICE)

    model.dropout = nn.Dropout(0)
    
    model.eval()
    
    with torch.no_grad():
        pre_val = model(featureTS)
        mae_loss_val = MAEloss(pre_val, targetTS)
        mape_loss_val = MAPEloss(pre_val, targetTS)
        mse_loss_val = MSEloss(pre_val, targetTS)
        score_val = R2score(pre_val, targetTS)
    
    return mae_loss_val, mape_loss_val, mse_loss_val, score_val, pre_val

In [21]:
SAVE_PATH = './saved_models/'
os.makedirs(SAVE_PATH, exist_ok = True)

def training(testDF, testtargetDF, model, trainDL, test_value):
    BREAK_CNT_LOSS = 0
    BREAK_CNT_SCORE = 0
    LIMIT_VALUE = 10

    MAE_LOSS_HISTORY, MAPE_LOSS_HISTORY, MSE_LOSS_HISTORY, SCORE_HISTORY = [[], []], [[], []], [[], []], [[], []]

    for epoch in range(1, EPOCH + 1):
        SAVE_MODEL = os.path.join(SAVE_PATH, f'model_{epoch}.pth')
        SAVE_WEIGHT = os.path.join(SAVE_PATH, f'model_weights_{epoch}.pth')

        mae_loss_total, mape_loss_total, mse_loss_total, score_total = 0, 0, 0, 0

        for featureTS, targetTS in trainDL:
            pre_y = model(featureTS)

            mae_loss = MAEloss(pre_y, targetTS)
            mape_loss = MAPEloss(pre_y, targetTS)
            mse_loss = MSEloss(pre_y, targetTS)

            mae_loss_total += mae_loss.item()
            mape_loss_total += mape_loss.item()
            mse_loss_total += mse_loss.item()

            score = R2score(pre_y, targetTS)
            score_total += score.item()

            total_loss = mae_loss + mape_loss + mse_loss

            optimizer.zero_grad()
            
            total_loss.backward()

            optimizer.step()

        test_mae_loss, test_mape_loss, test_mse_loss, test_score, pre_val = testing(testDF, testtargetDF, model)

        MAE_LOSS_HISTORY[1].append(test_mae_loss)
        MAPE_LOSS_HISTORY[1].append(test_mape_loss)
        MSE_LOSS_HISTORY[1].append(test_mse_loss)
        SCORE_HISTORY[1].append(test_score)

        MAE_LOSS_HISTORY[0].append(mae_loss_total / len(trainDL))
        MAPE_LOSS_HISTORY[0].append(mape_loss_total / len(trainDL))
        MSE_LOSS_HISTORY[0].append(mse_loss_total / len(trainDL))
        SCORE_HISTORY[0].append(score_total / len(trainDL))

        print(f'pre_val : {pre_val.squeeze().tolist()[:10]}\ny_val : {test_value.values.squeeze()[:10]}\n')
        print(f'[{epoch} / {EPOCH}]\n- TRAIN MAE LOSS : {MAE_LOSS_HISTORY[0][-1]}')
        print(f'- TRAIN MAPE LOSS : {MAPE_LOSS_HISTORY[0][-1]}')
        print(f'- TRAIN MSE LOSS : {MSE_LOSS_HISTORY[0][-1]}')
        print(f'- TRAIN R2 SCORE : {SCORE_HISTORY[0][-1]}')

        print(f'\n- TEST MAE LOSS : {MAE_LOSS_HISTORY[1][-1]}')
        print(f'- TEST MAPE LOSS : {MAPE_LOSS_HISTORY[1][-1]}')
        print(f'- TEST MSE LOSS : {MSE_LOSS_HISTORY[1][-1]}')
        print(f'- TEST R2 SCORE : {SCORE_HISTORY[1][-1]}')

        scheduler.step(test_mae_loss)

        if len(MAE_LOSS_HISTORY[1]) >= 2:
            if MAE_LOSS_HISTORY[1][-1] >= MAE_LOSS_HISTORY[1][-2]: BREAK_CNT_LOSS += 1
        
        if len(MAE_LOSS_HISTORY[1]) == 1:
            torch.save(model.state_dict(), SAVE_WEIGHT)
            torch.save(model, SAVE_MODEL)

        else:
            if MAE_LOSS_HISTORY[1][-1] < min(MAE_LOSS_HISTORY[1][:-1]):
                torch.save(model.state_dict(), SAVE_WEIGHT)
                torch.save(model, SAVE_MODEL)

        if BREAK_CNT_LOSS > LIMIT_VALUE:
            print(f"성능 및 손실 개선이 없어서 {epoch} EPOCH에 학습 중단")
            # break

    return MAE_LOSS_HISTORY, MAPE_LOSS_HISTORY, MSE_LOSS_HISTORY, SCORE_HISTORY
        

In [22]:
water_y_test

Unnamed: 0,28
289500,58
26491,218
134099,84
87950,210
165405,169
...,...
211567,68
36707,350
226226,55
338315,143


In [23]:
mae_loss, mape_loss, mse_loss, r2 = training(water_X_test, water_y_test, lstm_model, water_trainDL, water_y_test)


pre_val : [-8.991474169306457e-05, 0.0017274414421990514, 0.033869680017232895, 0.004415200091898441, 0.0014199650613591075, 0.024644186720252037, 0.007711134385317564, 0.00305841863155365, 0.013776338659226894, 0.007394302636384964]
y_val : [ 58 218  84 210 169 138 150 183 147 260]

[1 / 100]
- TRAIN MAE LOSS : 148.34912220135857
- TRAIN MAPE LOSS : 192.16441621757255
- TRAIN MSE LOSS : 27962.96738683364
- TRAIN R2 SCORE : -4.112658343230977

- TEST MAE LOSS : 148.71746826171875
- TEST MAPE LOSS : 2.5231847763061523
- TEST MSE LOSS : 28082.29296875
- TEST R2 SCORE : -3.707451343536377


KeyboardInterrupt: 

In [None]:
# mae loss = 8.3237
state_dict = torch.load('./saved_models/model_weights_30.pth', map_location=torch.device('cpu'))
lstm_model.load_state_dict(state_dict)
lstm_model.eval()

featureDF = water_X_test
targetDF = water_y_test

featureTS = torch.FloatTensor(featureDF.values).to(DEVICE)
targetTS = torch.FloatTensor(targetDF.values).to(DEVICE)

pre_y = lstm_model(featureTS)
mae_loss = MAEloss(pre_y, targetTS)
print(mae_loss)

  state_dict = torch.load('./saved_models/model_weights_30.pth', map_location=torch.device('cpu'))


tensor(8.3237, grad_fn=<CloneBackward0>)
