In [1]:
import copy
from pathlib import Path
import warnings

import numpy as np
import pandas as pd
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
import math

from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import SMAPE, PoissonLoss, QuantileLoss
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

In [2]:
group_name = ["A", "B", "C", "D", "E"]

group = [
    [34, 40, 42, 41, 4, 10, 11, 12],
    [35, 6, 48, 27, 57, 8, 25, 56, 26, 55, 47, 13, 53, 18, 7, 17, 46],
    [31, 33, 9, 3, 1, 32],
    [29, 38, 43, 58, 15, 22, 39, 54, 23, 44, 45, 37, 52, 2, 14],
    [21, 19, 50, 49, 20, 51, 30, 36, 28, 59, 5, 60, 16, 24]
]

save_path = "./Refined_Data/Grouped_Data/Input_Data1.csv"

In [3]:
def get_pow(series):
    return math.pow(series, 0.16)

In [4]:
data_path = "./data/train.csv"

data = pd.read_csv(data_path, parse_dates = ["date_time"])

data["time_idx"] = 0
data["month"] = 0
data["energy_group"] = 0
data["Week"] = 0
data["day_of_Week"] = 0
data["day_of_month"] = 0
data["day"] = 0
data["24Hour"] = 0
data["holiday"] = 0
data["Weekend"] = 0
data["hour_cat"] = 0

for i in range(len(group)):
    data.loc[data["num"].isin(group[i]), "energy_group"] = str(i)

data.loc[data["date_time"].dt.month == 6, 'month'] = 0
data.loc[data["date_time"].dt.month == 7, 'month'] = 30
data.loc[data["date_time"].dt.month == 8, 'month'] = 61

data.loc[(data["date_time"].dt.month == 8) & (data["date_time"].dt.day == 17) , 'holiday'] = 1

data["time_idx"] = data["date_time"].dt.hour + data["date_time"].dt.day * (24) + data["month"] * 24
data["time_idx"] = data["time_idx"] - min(data["time_idx"])

data["Week"] = data["date_time"].dt.isocalendar().week - 23
data["day_of_Week"] = (data["date_time"].dt.day + data["month"]) % 7
data["day_of_month"] = data["date_time"].dt.day
data["day"] = data["date_time"].dt.day + data["month"]
data["24Hour"] = data["date_time"].dt.hour

data.loc[data["day_of_Week"] == 6, 'Weekend'] = 1
data.loc[data["day_of_Week"] == 0, 'Weekend'] = 1

# 하루 일과 분류
# 취침 (평일 0 ~ 6, 주말 1 ~ 9)))
data.loc[((data["Weekend"] == 0) & (data["holiday"] == 0)) & ((data["24Hour"] == 0) | (data["24Hour"] == 1) | (data["24Hour"] == 2) | (data["24Hour"] == 3) | (data["24Hour"] == 4) | (data["24Hour"] == 5) | (data["24Hour"] == 6)) , 'hour_cat'] = 0
data.loc[((data["Weekend"] == 1) | (data["holiday"] == 1)) & ((data["24Hour"] == 1) | (data["24Hour"] == 2) | (data["24Hour"] == 3) | (data["24Hour"] == 4) | (data["24Hour"] == 5) | (data["24Hour"] == 6) | (data["24Hour"] == 7) | (data["24Hour"] == 8) | (data["24Hour"] == 9)) , 'hour_cat'] = 0
# 준비 (평일 7 ~ 8)
data.loc[((data["Weekend"] == 0) & (data["holiday"] == 0)) & ((data["24Hour"] == 7) | (data["24Hour"] == 8)) , 'hour_cat'] = 1
# 일과 (평일 9 ~ 11, 14 ~ 17)
data.loc[((data["Weekend"] == 0) & (data["holiday"] == 0)) & ((data["24Hour"] == 9) | (data["24Hour"] == 10) | (data["24Hour"] == 11) | (data["24Hour"] == 14) | (data["24Hour"] == 15) | (data["24Hour"] == 16) | (data["24Hour"] == 17)) , 'hour_cat'] = 2
# 식사 (평일 주말 12, 13, 18, 19))
data.loc[((data["Weekend"] == 0) & (data["holiday"] == 0)) & ((data["24Hour"] == 12) | (data["24Hour"] == 13) | (data["24Hour"] == 18) | (data["24Hour"] == 19)), 'hour_cat'] = 3
data.loc[((data["Weekend"] == 1) | (data["holiday"] == 1)) & ((data["24Hour"] == 12) | (data["24Hour"] == 13) | (data["24Hour"] == 18) | (data["24Hour"] == 19)), 'hour_cat'] = 3
# 여가 (평일 20 ~ 22, 주말 10 ~ 11, 14 ~ 17, 20 ~ 22)
data.loc[((data["Weekend"] == 0) & (data["holiday"] == 0)) & ((data["24Hour"] == 20) | (data["24Hour"] == 21) | (data["24Hour"] == 22)), 'hour_cat'] = 4
data.loc[((data["Weekend"] == 1) | (data["holiday"] == 1)) & ((data["24Hour"] == 10) | (data["24Hour"] == 11) | (data["24Hour"] == 14) | (data["24Hour"] == 15) | (data["24Hour"] == 16) | (data["24Hour"] == 17) | (data["24Hour"] == 20) | (data["24Hour"] == 21) | (data["24Hour"] == 22)), 'hour_cat'] = 4
# 휴식 (평일 23 주말 0, 23) => 취침
data.loc[((data["Weekend"] == 0) & (data["holiday"] == 0)) & ((data["24Hour"] == 23)) , 'hour_cat'] = 0
data.loc[((data["Weekend"] == 1) | (data["holiday"] == 1)) & ((data["24Hour"] == 0) | (data["24Hour"] == 23)) , 'hour_cat'] = 0

data = data.drop("month",axis='columns')

data.rename(columns = {'전력사용량(kWh)' : 'kWH', '기온(°C)' : 'C', '풍속(m/s)' : 'm/s', '습도(%)' : 'wet', '강수량(mm)' : 'mm','일조(hr)' : 'hr', '비전기냉방설비운영' : "non_electric_aircondition", "태양광보유" : "sunlight"}, inplace = True)

# 불쾌 지수
data['discomfort_real'] = (1.8 * data['C']) - 0.55 * (1 - (data['wet'] * 0.01)) * (1.8 * data['C'] - 26) + 32
data.loc[(data['discomfort_real'] < 68), 'discomfort_index'] = 0
data.loc[((data['discomfort_real'] >= 68) & (data['discomfort_real'] < 75)), 'discomfort_index'] = 1
data.loc[((data['discomfort_real'] >= 75) & (data['discomfort_real'] < 80)), 'discomfort_index'] = 2
data.loc[(data['discomfort_real'] >= 80), 'discomfort_index'] = 3
data = data.drop("discomfort_real",axis='columns')

data = data.astype({'non_electric_aircondition' : int, 'sunlight' : int})
data['num'] = data['num'].apply(str)
data['Week'] = data['Week'].apply(str)
data['day_of_Week'] = data['day_of_Week'].apply(str)
data['day_of_month'] = data['day_of_month'].apply(str)
data['day'] = data['day'].apply(str)
data['24Hour'] = data['24Hour'].apply(str)
data['holiday'] = data['holiday'].apply(str)
data['Weekend'] = data['Weekend'].apply(str)

data.to_csv(save_path, mode='w')

print('complete')

complete
