In [1]:
import copy
from pathlib import Path
import warnings

import numpy as np
import pandas as pd
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
import torch

from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import SMAPE, PoissonLoss, QuantileLoss
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

In [2]:
data_path = "./data/train.csv"

group = [
    [34, 40, 42, 41, 4, 10, 11, 12],
    [35, 6, 48, 27, 57, 8, 25, 56, 26, 55, 47, 13, 53, 18, 7, 17, 46],
    [31, 33, 9, 3, 1, 32],
    [29, 38, 43, 58, 15, 22, 39, 54, 23, 44, 45, 37, 52, 2, 14],
    [21, 19, 50, 49, 20, 51, 30, 36, 28, 59, 5, 60, 16, 24]
]
group_name = ["A", "B", "C", "D", "E"]

data = pd.read_csv(data_path, parse_dates = ["date_time"])

data["time_idx"] = 0
data["month"] = 0
data["energy_group"] = 0
data["Week"] = 0
data["24Hour"] = 0

data.loc[data["num"].isin(group[0]), "energy_group"] = str(1)
data.loc[data["num"].isin(group[1]), "energy_group"] = str(2)
data.loc[data["num"].isin(group[2]), "energy_group"] = str(3)
data.loc[data["num"].isin(group[3]), "energy_group"] = str(4)
data.loc[data["num"].isin(group[4]), "energy_group"] = str(5)

data.loc[data["date_time"].dt.month == 6, 'month'] = 0
data.loc[data["date_time"].dt.month == 7, 'month'] = 30
data.loc[data["date_time"].dt.month == 8, 'month'] = 61

data["time_idx"] = data["date_time"].dt.hour + data["date_time"].dt.day * (24) + data["month"] * 24
data["time_idx"] = data["time_idx"] - min(data["time_idx"])

data["Week"] = (data["date_time"].dt.day + data["month"]) % 7
data["24Hour"] = data["date_time"].dt.hour

data = data.drop("month",axis='columns')

data.rename(columns = {'전력사용량(kWh)' : 'kWH', '기온(°C)' : 'C', '풍속(m/s)' : 'm/s', '습도(%)' : 'wet', '강수량(mm)' : 'mm','일조(hr)' : 'hr', '비전기냉방설비운영' : "non_electric_aircondition", "태양광보유" : "sunlight"}, inplace = True)

data = data.astype({'non_electric_aircondition' : int, 'sunlight' : int})
data['num'] = data['num'].apply(str)
data['Week'] = data['Week'].apply(str)
data['24Hour'] = data['24Hour'].apply(str)

# data = data.round(6)

data_bag = [
    data.loc[data["energy_group"] == "1"].copy(),
    data.loc[data["energy_group"] == "2"].copy(),
    data.loc[data["energy_group"] == "3"].copy(),
    data.loc[data["energy_group"] == "4"].copy(),
    data.loc[data["energy_group"] == "5"].copy()
]

for i in range(len(data_bag)):
    data_bag[i].to_csv(f"Refined_Data/Grouped_Data/{group_name[i]}.csv", mode='w')

In [3]:
for i in range(len(group_name)):
    dataset = pd.read_csv(f"Refined_Data/Grouped_Data/{group_name[i]}.csv", index_col=0)
    print(dataset.head())

      num            date_time      kWH     C  m/s   wet   mm   hr  \
6120    4  2020-06-01 00:00:00  339.552  22.6  0.7  62.0  0.0  0.0   
6121    4  2020-06-01 01:00:00  387.504  21.7  1.7  67.0  0.0  0.0   
6122    4  2020-06-01 02:00:00  203.472  21.4  2.0  69.0  0.0  0.0   
6123    4  2020-06-01 03:00:00  186.192  20.9  1.7  71.0  0.0  0.0   
6124    4  2020-06-01 04:00:00  185.760  20.7  1.8  73.0  0.0  0.0   

      non_electric_aircondition  sunlight  time_idx  energy_group  Week  \
6120                          1         1         0             1     1   
6121                          1         1         1             1     1   
6122                          1         1         2             1     1   
6123                          1         1         3             1     1   
6124                          1         1         4             1     1   

      24Hour  
6120       0  
6121       1  
6122       2  
6123       3  
6124       4  
       num            date_time     kW