In [1]:
import sys
import os


from VAEModule import *

import torch
import torch.nn as nn
import pickle
import pandas as pd
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, RobustScaler
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torchsummary import summary
from torchmetrics.regression import R2Score, MeanAbsoluteError, MeanAbsolutePercentageError, MeanSquaredError


In [2]:
DATA_PATH = '../Data/'

electric_df = pd.read_csv(DATA_PATH + 'electric_df_clear.csv')
water_df = pd.read_csv(DATA_PATH + 'water_df_clear.csv')

In [3]:
print(electric_df.head())
print()
print(water_df.head())

      0     1     2     3     4     5     6     7     8     9  ...    18  \
0  1.23  1.45  1.73  1.62  1.31  1.48  1.74  1.71  1.30  1.49  ...  1.77   
1  1.45  1.73  1.62  1.31  1.48  1.74  1.71  1.30  1.49  1.94  ...  1.68   
2  1.73  1.62  1.31  1.48  1.74  1.71  1.30  1.49  1.94  1.68  ...  1.20   
3  1.62  1.31  1.48  1.74  1.71  1.30  1.49  1.94  1.68  1.26  ...  1.42   
4  1.31  1.48  1.74  1.71  1.30  1.49  1.94  1.68  1.26  1.39  ...  1.88   

     19    20    21    22    23    24    25    26    27  
0  1.68  1.20  1.42  1.88  1.67  1.24  1.39  1.67  1.60  
1  1.20  1.42  1.88  1.67  1.24  1.39  1.67  1.60  1.26  
2  1.42  1.88  1.67  1.24  1.39  1.67  1.60  1.26  1.41  
3  1.88  1.67  1.24  1.39  1.67  1.60  1.26  1.41  1.68  
4  1.67  1.24  1.39  1.67  1.60  1.26  1.41  1.68  1.59  

[5 rows x 28 columns]

     0    1    2    3    4    5    6    7    8    9  ...   18   19   20   21  \
0   30  120  210  410   32  184  180  260   35  145  ...  188   95   46  139   
1  120  210

In [4]:
# electric_features = electric_df[electric_df.columns[:]]
# electric_target = electric_df[electric_df.columns[-1:]]

# electric_X_train, electric_X_test, electric_y_train, electric_y_test = train_test_split(electric_features,
#                                                     electric_target,
#                                                     random_state = 42,
#                                                     test_size = 0.2)

# water_features = water_df[water_df.columns[:]]
# water_target = water_df[water_df.columns[-1:]]

# water_X_train, water_X_test, water_y_train, water_y_test = train_test_split(water_features,
#                                                                             water_target,
#                                                                             random_state = 42,
#                                                                             test_size = 0.2)

In [5]:
electric_rbscaler = RobustScaler().fit(electric_df)
water_rbscaler = RobustScaler().fit(water_df)

with open('electric_min_max_scaler.pkl', 'wb') as f:
    pickle.dump(electric_rbscaler, f)

with open('water_robust_scaler.pkl', 'wb') as f:
    pickle.dump(water_rbscaler, f)

In [6]:
electric_X_train_scaled = electric_rbscaler.transform(electric_df)
# electric_X_test_scaled = electric_rbscaler.transform(electric_X_test)

water_X_train_scaled = water_rbscaler.transform(water_df)
# water_X_test_scaled = water_rbscaler.transform(water_X_test)

electric_X_train = pd.DataFrame(electric_X_train_scaled, columns = electric_df.columns)
# electric_X_test = pd.DataFrame(electric_X_test_scaled, columns = electric_X_test.columns)

water_X_train = pd.DataFrame(water_X_train_scaled, columns = water_df.columns)
# water_X_test = pd.DataFrame(water_X_test_scaled, columns = water_X_test.columns)

In [15]:
EPOCH = 100000
BATCH_SIZE = 64
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
LR = 0.0001

In [16]:
electric_trainDS = CustomDataset(electric_X_train)
water_trainDS = CustomDataset(water_X_train)

electric_trainDL = DataLoader(electric_trainDS, batch_size = BATCH_SIZE)
water_trainDL = DataLoader(water_trainDS, batch_size = BATCH_SIZE)

In [17]:
input_size = 28
hidden_dim = 32
latent_dim = 16



vae_model = VAEModel(input_size = input_size, hidden_dim = hidden_dim,
                     latent_dim = latent_dim)

optimizer = optim.Adam(vae_model.parameters(), lr = LR)

scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode = 'min', patience = 10, verbose = True)


In [18]:
summary(vae_model, input_size = (28, ))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                   [-1, 32]             928
              ReLU-2                   [-1, 32]               0
            Linear-3                   [-1, 32]           1,056
              ReLU-4                   [-1, 32]               0
            Linear-5                   [-1, 16]             528
            Linear-6                   [-1, 16]             528
            Linear-7                   [-1, 32]             544
              ReLU-8                   [-1, 32]               0
            Linear-9                   [-1, 32]           1,056
             ReLU-10                   [-1, 32]               0
           Linear-11                   [-1, 28]             924
Total params: 5,564
Trainable params: 5,564
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/back

In [19]:
vae_loss = training(vae_model, water_trainDL,
                    optimizer, EPOCH, scheduler, DEVICE)

[1 / 100000]
- TRAIN VAE LOSS : 784.380608842728
[2 / 100000]
- TRAIN VAE LOSS : 668.8245817346776
[3 / 100000]
- TRAIN VAE LOSS : 640.5379619666026
[4 / 100000]
- TRAIN VAE LOSS : 629.826969559311
[5 / 100000]
- TRAIN VAE LOSS : 625.8965149940328
[6 / 100000]
- TRAIN VAE LOSS : 623.4543589395835
[7 / 100000]
- TRAIN VAE LOSS : 621.4725039664735
[8 / 100000]
- TRAIN VAE LOSS : 619.9473355881711
[9 / 100000]
- TRAIN VAE LOSS : 618.7874473612359
[10 / 100000]
- TRAIN VAE LOSS : 617.907567118922
[11 / 100000]
- TRAIN VAE LOSS : 616.5758763644712
[12 / 100000]
- TRAIN VAE LOSS : 615.6952581743822
[13 / 100000]
- TRAIN VAE LOSS : 615.2770204408795
[14 / 100000]
- TRAIN VAE LOSS : 614.6463448003674
[15 / 100000]
- TRAIN VAE LOSS : 613.5410642488629
[16 / 100000]
- TRAIN VAE LOSS : 612.8759715709281
[17 / 100000]
- TRAIN VAE LOSS : 611.928552309672
[18 / 100000]
- TRAIN VAE LOSS : 611.1235104756997
[19 / 100000]
- TRAIN VAE LOSS : 610.1686020641462
[20 / 100000]
- TRAIN VAE LOSS : 609.1404191

KeyboardInterrupt: 