In [66]:
from Module.ModelModule import CustomDataset

import os
import torch
import torch.nn as nn
import pickle
import pandas as pd
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, RobustScaler
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torchsummary import summary
from torchmetrics.regression import R2Score, MeanAbsoluteError, MeanAbsolutePercentageError, MeanSquaredError


In [67]:
DATA_PATH = '../Data/'

electric_df = pd.read_csv(DATA_PATH + 'electric_df.csv', index_col = 0)
water_df = pd.read_csv(DATA_PATH + 'water_df.csv', index_col = 0)

In [68]:
print(electric_df.head())
print()
print(water_df.head())

      0     1     2     3     4     5     6     7     8     9  ...    19  \
0  1.23  1.45  1.73  1.62  1.31  1.48  1.74  1.71  1.30  1.49  ...  1.68   
1  1.45  1.73  1.62  1.31  1.48  1.74  1.71  1.30  1.49  1.94  ...  1.20   
2  1.73  1.62  1.31  1.48  1.74  1.71  1.30  1.49  1.94  1.68  ...  1.42   
3  1.62  1.31  1.48  1.74  1.71  1.30  1.49  1.94  1.68  1.26  ...  1.88   
4  1.31  1.48  1.74  1.71  1.30  1.49  1.94  1.68  1.26  1.39  ...  1.67   

     20    21    22    23    24    25    26    27    28  
0  1.20  1.42  1.88  1.67  1.24  1.39  1.67  1.60  1.26  
1  1.42  1.88  1.67  1.24  1.39  1.67  1.60  1.26  1.41  
2  1.88  1.67  1.24  1.39  1.67  1.60  1.26  1.41  1.68  
3  1.67  1.24  1.39  1.67  1.60  1.26  1.41  1.68  1.59  
4  1.24  1.39  1.67  1.60  1.26  1.41  1.68  1.59  1.24  

[5 rows x 29 columns]

     0    1    2    3    4    5    6    7    8    9  ...   19   20   21   22  \
0   30  120  210  410   32  184  180  260   35  145  ...   95   46  139  204   
1  120  210

In [69]:
electric_features = electric_df[electric_df.columns[:-1]]
electric_target = electric_df[electric_df.columns[-1:]]

electric_X_train, electric_X_test, electric_y_train, electric_y_test = train_test_split(electric_features,
                                                    electric_target,
                                                    random_state = 42,
                                                    test_size = 0.2)

water_features = water_df[water_df.columns[:-1]]
water_target = water_df[water_df.columns[-1:]]

water_X_train, water_X_test, water_y_train, water_y_test = train_test_split(water_features,
                                                                            water_target,
                                                                            random_state = 42,
                                                                            test_size = 0.2)

In [70]:
electric_rbscaler = RobustScaler().fit(electric_X_train)
water_rbscaler = RobustScaler().fit(water_X_train)

with open('electric_min_max_scaler.pkl', 'wb') as f:
    pickle.dump(electric_rbscaler, f)

with open('water_robust_scaler.pkl', 'wb') as f:
    pickle.dump(water_rbscaler, f)

In [71]:
electric_X_train_scaled = electric_rbscaler.transform(electric_X_train)
electric_X_test_scaled = electric_rbscaler.transform(electric_X_test)

water_X_train_scaled = water_rbscaler.transform(water_X_train)
water_X_test_scaled = water_rbscaler.transform(water_X_test)

electric_X_train = pd.DataFrame(electric_X_train_scaled, columns = electric_X_train.columns)
electric_X_test = pd.DataFrame(electric_X_test_scaled, columns = electric_X_test.columns)

water_X_train = pd.DataFrame(water_X_train_scaled, columns = water_X_train.columns)
water_X_test = pd.DataFrame(water_X_test_scaled, columns = water_X_test.columns)

In [72]:
EPOCH = 100000
BATCH_SIZE = 32
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
LR = 0.001

In [73]:
electric_trainDS = CustomDataset(electric_X_train, electric_y_train)
water_trainDS = CustomDataset(water_X_train, water_y_train)

electric_trainDL = DataLoader(electric_trainDS, batch_size = BATCH_SIZE)
water_trainDL = DataLoader(water_trainDS, batch_size = BATCH_SIZE)

In [74]:
class VAEModel(nn.Module):
    def __init__(self, input_size, hidden_dim, latent_dim, n_layers, dropout,
                 bidirectional):
        super().__init__()

        self.encoder = nn.GRU(
            input_size = input_size,
            hidden_size = hidden_dim,
            num_layers = n_layers,
            dropout = dropout,
            bidirectional = bidirectional,
            batch_first = True
        )

        self.mu = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, latent_dim)
        self.logvar = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, latent_dim)

        self.decoder = nn.GRU(
            input_size = latent_dim,
            hidden_size = hidden_dim,
            num_layers = n_layers,
            dropout = dropout,
            bidirectional = bidirectional,
            batch_first = True
        )
        self.output = nn.Linear(hidden_dim * 2 if bidirectional else hidden_dim, 1)

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        
        return mu + eps * std
    
    def forward(self, inputs):
        encoder, hidden = self.encoder(inputs)
        
        if self.encoder.bidirectional:
            hidden = torch.cat([hidden[-2], hidden[-1]], dim=-1)
        else:
            hidden = hidden[-1]

        mu = self.mu(hidden)
        logvar = self.logvar(hidden)
        
        reparameter = self.reparameterize(mu, logvar)
        reparameter = reparameter.unsqueeze(1)

        decoder, _ = self.decoder(reparameter)

        reconstruction = self.output(decoder)

        return reconstruction, mu, logvar
    


In [75]:
def VAE_loss(reconstruction, target, mu, logvar):
    MAE_loss = nn.L1Loss(reduction = 'mean')
    loss_val = MAE_loss(reconstruction, target)

    kl_loss = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())

    return loss_val + kl_loss

In [76]:
def testing(featureDF, targetDF, model, DEVICE):
    featureTS = torch.FloatTensor(featureDF.values).unsqueeze(1).to(DEVICE)
    targetTS = torch.FloatTensor(targetDF.values).unsqueeze(1).to(DEVICE)
    
    model.eval()
    
    with torch.no_grad():
        reconstruction, mu, logvar = model(featureTS)
        vae_loss_val = VAE_loss(reconstruction, targetTS, mu, logvar)

    return vae_loss_val


In [77]:
def training(testDF, testtargetDF, model, trainDL,
              optimizer, EPOCH, scheduler, DEVICE, accumulation_steps):
    SAVE_PATH = './saved_models/'
    os.makedirs(SAVE_PATH, exist_ok = True)

    BREAK_CNT_LOSS = 0
    BREAK_CNT_SCORE = 0
    LIMIT_VALUE = 10

    VAE_LOSS_HISTORY = [[], []]

    for epoch in range(1, EPOCH + 1):
        SAVE_MODEL = os.path.join(SAVE_PATH, f'model_{epoch}.pth')
        SAVE_WEIGHT = os.path.join(SAVE_PATH, f'model_weights_{epoch}.pth')

        vae_loss_total = 0

        for step, (featureTS, targetTS) in enumerate(trainDL):
            featureTS = featureTS.unsqueeze(1).to(DEVICE)
            targetTS = targetTS.unsqueeze(1).to(DEVICE)

            # Forward pass
            reconstruction, mu, logvar = model(featureTS)
            vae_loss = VAE_loss(reconstruction, targetTS, mu, logvar)

            # Loss 누적 및 Backward pass
            vae_loss = vae_loss / accumulation_steps  # 누적 단계로 나눔
            vae_loss.backward()
            vae_loss_total += vae_loss.item() * accumulation_steps

            # Accumulation 단계마다 가중치 업데이트
            if (step + 1) % accumulation_steps == 0:
                optimizer.step()
                optimizer.zero_grad()
        

        test_vae_loss = testing(testDF, testtargetDF, model, DEVICE)

        VAE_LOSS_HISTORY[1].append(test_vae_loss)
        VAE_LOSS_HISTORY[0].append(vae_loss_total / len(trainDL))

        print(f'[{epoch} / {EPOCH}]\n- TRAIN VAE LOSS : {VAE_LOSS_HISTORY[0][-1]}')
        print(f'\n- TEST VAE LOSS : {VAE_LOSS_HISTORY[1][-1]}')
        scheduler.step(test_vae_loss)

        if len(VAE_LOSS_HISTORY[1]) >= 2:
            if VAE_LOSS_HISTORY[1][-1] >= VAE_LOSS_HISTORY[1][-2]: BREAK_CNT_LOSS += 1
        
        if len(VAE_LOSS_HISTORY[1]) == 1:
            torch.save(model.state_dict(), SAVE_WEIGHT)
            torch.save(model, SAVE_MODEL)

        else:
            if VAE_LOSS_HISTORY[1][-1] < min(VAE_LOSS_HISTORY[1][:-1]):
                torch.save(model.state_dict(), SAVE_WEIGHT)
                torch.save(model, SAVE_MODEL)

        if BREAK_CNT_LOSS > LIMIT_VALUE:
            print(f"성능 및 손실 개선이 없어서 {epoch} EPOCH에 학습 중단")
            # break

    return VAE_LOSS_HISTORY

In [78]:
input_size = 28
hidden_dim = 64
latent_dim = 32
n_layers = 2
dropout = 0.5
bidirectional = True
accumulation_steps = 2


vae_model = VAEModel(input_size = input_size, hidden_dim = hidden_dim,
                     latent_dim = latent_dim, n_layers = n_layers,
                     dropout = dropout, bidirectional = bidirectional)

optimizer = optim.Adam(vae_model.parameters(), lr = LR)

scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode = 'min', patience = 100, verbose = True)




In [None]:
vae_loss = training(water_X_test, water_y_test, vae_model, water_trainDL,
                    optimizer, EPOCH, scheduler, DEVICE, accumulation_steps)

[1 / 100000]
- TRAIN VAE LOSS : 68.03102227985158

- TEST VAE LOSS : 60.31974411010742
[2 / 100000]
- TRAIN VAE LOSS : 60.23587191951976

- TEST VAE LOSS : 60.29950714111328
[3 / 100000]
- TRAIN VAE LOSS : 60.23522606546739

- TEST VAE LOSS : 60.277217864990234
[4 / 100000]
- TRAIN VAE LOSS : 60.23519761545518

- TEST VAE LOSS : 60.27719497680664
[5 / 100000]
- TRAIN VAE LOSS : 60.236816028370576

- TEST VAE LOSS : 60.27898025512695
[6 / 100000]
- TRAIN VAE LOSS : 60.23571530196246

- TEST VAE LOSS : 60.277366638183594
[7 / 100000]
- TRAIN VAE LOSS : 60.2351417240816

- TEST VAE LOSS : 60.27705764770508
[8 / 100000]
- TRAIN VAE LOSS : 60.235180975072524

- TEST VAE LOSS : 60.27873992919922
[9 / 100000]
- TRAIN VAE LOSS : 60.23514332490809

- TEST VAE LOSS : 60.27680587768555
[10 / 100000]
- TRAIN VAE LOSS : 60.23519342826395

- TEST VAE LOSS : 60.2785530090332
[11 / 100000]
- TRAIN VAE LOSS : 60.23517090786205

- TEST VAE LOSS : 60.27680206298828
[12 / 100000]
- TRAIN VAE LOSS : 60.238