In [140]:
import sys
import os

from AutoEncoderModule import AutoEncoderDataset

import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import pickle
import pandas as pd
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, RobustScaler
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torchmetrics.regression import R2Score, MeanAbsoluteError, MeanAbsolutePercentageError, MeanSquaredError


In [99]:
DATA_PATH = '../Data/'

electric_df = pd.read_csv(DATA_PATH + 'electric_df_clear.csv')
water_df = pd.read_csv(DATA_PATH + 'water_df_clear.csv')

In [100]:
print(electric_df.head())
print()
print(water_df.head())

      0     1     2     3     4     5     6     7     8     9  ...    19  \
0  1.23  1.45  1.73  1.62  1.31  1.48  1.74  1.71  1.30  1.49  ...  1.68   
1  1.45  1.73  1.62  1.31  1.48  1.74  1.71  1.30  1.49  1.94  ...  1.20   
2  1.73  1.62  1.31  1.48  1.74  1.71  1.30  1.49  1.94  1.68  ...  1.42   
3  1.62  1.31  1.48  1.74  1.71  1.30  1.49  1.94  1.68  1.26  ...  1.88   
4  1.31  1.48  1.74  1.71  1.30  1.49  1.94  1.68  1.26  1.39  ...  1.67   

     20    21    22    23    24    25    26    27    28  
0  1.20  1.42  1.88  1.67  1.24  1.39  1.67  1.60  1.26  
1  1.42  1.88  1.67  1.24  1.39  1.67  1.60  1.26  1.41  
2  1.88  1.67  1.24  1.39  1.67  1.60  1.26  1.41  1.68  
3  1.67  1.24  1.39  1.67  1.60  1.26  1.41  1.68  1.59  
4  1.24  1.39  1.67  1.60  1.26  1.41  1.68  1.59  1.24  

[5 rows x 29 columns]

     0    1    2    3    4    5    6    7    8    9  ...   19   20   21   22  \
0   30  120  210  410   32  184  180  260   35  145  ...   95   46  139  204   
1  120  210

In [101]:
electric_features = electric_df[electric_df.columns[:-1]]
electric_target = electric_df[electric_df.columns[-1:]]

electric_X_train, electric_X_test, electric_y_train, electric_y_test = train_test_split(electric_features,
                                                    electric_target,
                                                    random_state = 42,
                                                    test_size = 0.2)

water_features = water_df[water_df.columns[:-1]]
water_target = water_df[water_df.columns[-1:]]

water_X_train, water_X_test, water_y_train, water_y_test = train_test_split(water_features,
                                                                            water_target,
                                                                            random_state = 42,
                                                                            test_size = 0.2)

In [102]:
electric_rbscaler = RobustScaler().fit(electric_X_train)
water_rbscaler = RobustScaler().fit(water_X_train)

with open('electric_min_max_scaler.pkl', 'wb') as f:
    pickle.dump(electric_rbscaler, f)

with open('water_robust_scaler.pkl', 'wb') as f:
    pickle.dump(water_rbscaler, f)

In [103]:
electric_X_train_scaled = electric_rbscaler.transform(electric_X_train)
electric_X_test_scaled = electric_rbscaler.transform(electric_X_test)

water_X_train_scaled = water_rbscaler.transform(water_X_train)
water_X_test_scaled = water_rbscaler.transform(water_X_test)

electric_X_train = pd.DataFrame(electric_X_train_scaled, columns = electric_X_train.columns)
electric_X_test = pd.DataFrame(electric_X_test_scaled, columns = electric_X_test.columns)

water_X_train = pd.DataFrame(water_X_train_scaled, columns = water_X_train.columns)
water_X_test = pd.DataFrame(water_X_test_scaled, columns = water_X_test.columns)

In [104]:
EPOCH = 100000
BATCH_SIZE = 32
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
LR = 0.001

In [127]:
electric_trainDS = AutoEncoderDataset(electric_X_train)
water_trainDS = AutoEncoderDataset(water_X_train)

electric_trainDL = DataLoader(electric_trainDS, batch_size = BATCH_SIZE)
water_trainDL = DataLoader(water_trainDS, batch_size = BATCH_SIZE)

In [162]:
class RecurrentAutoencoder(nn.Module):
    def __init__(self, input_size, hidden_dim, latent_dim, n_layers, dropout,
                 bidirectional):
        super().__init__()
        
        self.encoder = nn.LSTM(
            input_size = input_size,
            hidden_size = hidden_dim,
            num_layers = n_layers,
            dropout = dropout,
            bidirectional = bidirectional,
            batch_first = True
        )
        
        self.fc = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, latent_dim)

        self.reverse_fc = nn.Linear(latent_dim, hidden_dim * 2 if bidirectional else hidden_dim)
        
        self.decoder = nn.LSTM(
            input_size = hidden_dim * 2 if bidirectional else hidden_dim,
            hidden_size = input_size,
            num_layers = n_layers,
            dropout = dropout,
            bidirectional = False,
            batch_first = True
        )


    def forward(self, inputs):
        _, (hidden, _) = self.encoder(inputs)

        if self.encoder.bidirectional:
            hidden = torch.cat((hidden[-2], hidden[-1]), dim=-1)
        else:
            hidden = hidden[-1]

        fc = self.fc(hidden)

        reverse_fc = self.reverse_fc(fc).unsqueeze(1).repeat(1, inputs.size(1), 1)
        reconstructed, _ = self.decoder(reverse_fc)

        return reconstructed, fc


In [163]:
def testing(featureDF, targetDF, model, DEVICE):
    featureTS = torch.FloatTensor(featureDF.values).unsqueeze(1).to(DEVICE)
    targetTS = torch.FloatTensor(targetDF.values).unsqueeze(1).to(DEVICE)
    

    model.eval()
    
    with torch.no_grad():
        reconstructed, fc = model(featureTS)
        reconstructed = reconstructed.contiguous()
        targetTS = targetTS.contiguous()

        mae_loss_val = MeanAbsoluteError()(reconstructed, targetTS)
        mape_loss_val = MeanAbsolutePercentageError()(reconstructed, targetTS)
        mse_loss_val = MeanSquaredError()(reconstructed, targetTS)

        mae_loss_val = MeanAbsoluteError()(reconstructed, targetTS)
        mape_loss_val = MeanAbsolutePercentageError()(reconstructed, targetTS)
        mse_loss_val = MeanSquaredError()(reconstructed, targetTS)
    
    return mae_loss_val, mape_loss_val, mse_loss_val, reconstructed


In [164]:


def training(testDF, testtargetDF, model, trainDL,
              optimizer, EPOCH, scheduler, DEVICE):
    SAVE_PATH = './saved_models/'
    os.makedirs(SAVE_PATH, exist_ok = True)

    BREAK_CNT_LOSS = 0
    BREAK_CNT_SCORE = 0
    LIMIT_VALUE = 10

    MAE_LOSS_HISTORY, MAPE_LOSS_HISTORY, MSE_LOSS_HISTORY = [[], []], [[], []], [[], []]

    for epoch in range(1, EPOCH + 1):
        model.train()

        SAVE_MODEL = os.path.join(SAVE_PATH, f'model_{epoch}.pth')
        SAVE_WEIGHT = os.path.join(SAVE_PATH, f'model_weights_{epoch}.pth')

        mae_loss_total, mape_loss_total, mse_loss_total, score_total = 0, 0, 0, 0

        for featureTS, targetTS in trainDL:
            featureTS, targetTS = featureTS.unsqueeze(1).to(DEVICE), targetTS.unsqueeze(1).to(DEVICE)
            

            reconstructed, fc = model(featureTS)


            mae_loss = MeanAbsoluteError()(reconstructed, targetTS)
            mape_loss = MeanAbsolutePercentageError()(reconstructed, targetTS)
            mse_loss = MeanSquaredError()(reconstructed, targetTS)

            mae_loss_total += mae_loss.item()
            mape_loss_total += mape_loss.item()
            mse_loss_total += mse_loss.item()


            total_loss = mae_loss + mape_loss + mse_loss

            optimizer.zero_grad()
            
            total_loss.backward()

            optimizer.step()

        test_mae_loss, test_mape_loss, test_mse_loss, test_reconstructed = testing(testDF, testtargetDF, model, DEVICE)
        
        MAE_LOSS_HISTORY[1].append(test_mae_loss)
        MAPE_LOSS_HISTORY[1].append(test_mape_loss)
        MSE_LOSS_HISTORY[1].append(test_mse_loss)

        MAE_LOSS_HISTORY[0].append(mae_loss_total / len(trainDL))
        MAPE_LOSS_HISTORY[0].append(mape_loss_total / len(trainDL))
        MSE_LOSS_HISTORY[0].append(mse_loss_total / len(trainDL))
   
        print(f'[{epoch} / {EPOCH}]\n- TRAIN MAE LOSS : {MAE_LOSS_HISTORY[0][-1]}')
        print(f'- TRAIN MAPE LOSS : {MAPE_LOSS_HISTORY[0][-1]}')
        print(f'- TRAIN MSE LOSS : {MSE_LOSS_HISTORY[0][-1]}')

        print(f'\n- TEST MAE LOSS : {MAE_LOSS_HISTORY[1][-1]}')
        print(f'- TEST MAPE LOSS : {MAPE_LOSS_HISTORY[1][-1]}')
        print(f'- TEST MSE LOSS : {MSE_LOSS_HISTORY[1][-1]}')

        scheduler.step(test_mae_loss)

        if len(MAE_LOSS_HISTORY[1]) >= 2:
            if MAE_LOSS_HISTORY[1][-1] >= MAE_LOSS_HISTORY[1][-2]: BREAK_CNT_LOSS += 1
        
        if len(MAE_LOSS_HISTORY[1]) == 1:
            torch.save(model.state_dict(), SAVE_WEIGHT)
            torch.save(model, SAVE_MODEL)

        else:
            if MAE_LOSS_HISTORY[1][-1] < min(MAE_LOSS_HISTORY[1][:-1]):
                torch.save(model.state_dict(), SAVE_WEIGHT)
                torch.save(model, SAVE_MODEL)

        if BREAK_CNT_LOSS > LIMIT_VALUE:
            print(f"성능 및 손실 개선이 없어서 {epoch} EPOCH에 학습 중단")
            # break

    return MAE_LOSS_HISTORY, MAPE_LOSS_HISTORY, MSE_LOSS_HISTORY


In [173]:
input_size = 28
hidden_dim = 16
latent_dim = 8
n_layers = 2
dropout = 0.1
bidirectional = False

LR = 0.001

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

recurrent_autoencoder = RecurrentAutoencoder(input_size = input_size,
                                             hidden_dim = hidden_dim,
                                             latent_dim = latent_dim,
                                             n_layers = n_layers,
                                             dropout = dropout,
                                             bidirectional = bidirectional
                                             )
optimizer = optim.Adam(recurrent_autoencoder.parameters(), lr = LR)
scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode = 'min', patience = 100, verbose = True)




In [174]:
mae_loss, mape_loss, mse_loss = training(water_X_test, water_X_test, recurrent_autoencoder, water_trainDL,
                                             optimizer, EPOCH, scheduler, DEVICE)

[1 / 100000]
- TRAIN MAE LOSS : 0.5417304342651701
- TRAIN MAPE LOSS : 8.526283950372175
- TRAIN MSE LOSS : 0.48373930311286367

- TEST MAE LOSS : 0.5422823429107666
- TEST MAPE LOSS : 1.4446171522140503
- TEST MSE LOSS : 0.4848843216896057
[2 / 100000]
- TRAIN MAE LOSS : 0.5417200657704493
- TRAIN MAPE LOSS : 1.076484737521285
- TRAIN MSE LOSS : 0.48373673799154643

- TEST MAE LOSS : 0.5422818064689636
- TEST MAPE LOSS : 0.9858613014221191
- TEST MSE LOSS : 0.4848841428756714
[3 / 100000]
- TRAIN MAE LOSS : 0.541719948691088
- TRAIN MAPE LOSS : 0.9789327878301793
- TRAIN MSE LOSS : 0.4837367375497218

- TEST MAE LOSS : 0.5422818064689636
- TEST MAPE LOSS : 0.9764484763145447
- TEST MSE LOSS : 0.4848841428756714
[4 / 100000]
- TRAIN MAE LOSS : 0.5417199521840036
- TRAIN MAPE LOSS : 0.9760711270112258
- TRAIN MSE LOSS : 0.483736741113496

- TEST MAE LOSS : 0.5422818064689636
- TEST MAPE LOSS : 0.9760800004005432
- TEST MSE LOSS : 0.4848841428756714
[5 / 100000]
- TRAIN MAE LOSS : 0.5417

KeyboardInterrupt: 