### Code Hist.

 - CODE  
    &ensp; : Crawling - 특일 정보 조회 (KASI)

  - DATE  
    &ensp; 2024-02-22 Created  
    &emsp;&emsp;&emsp;&emsp;&emsp;&emsp; 1) 사용량을 Cluster별로 분류하기 위해 생성  
    &ensp; 2024-02-25 Updated  
    &emsp;&emsp;&emsp;&emsp;&emsp;&emsp; 1) 날씨 데이터 Merge 부분을 분리 및 제거 (intergration_Weather)  
    
 - DESC  
    &ensp; : 전처리 - 한국지역난방공사 열판매량/열공급량   
    &emsp; 1) 결측치가 없어서, 그대로 사용  
    &emsp;&ensp;&ensp; 
    &emsp;&ensp;&ensp; (Crawl Code 없음)   

 - DATA  
    &emsp; <"Input">  
    1) None (Input Dataset)  
    &emsp;- Period :   
    &emsp;- Interval : 

    &emsp; <"Output">  
    1) Hourly (관측소/년도별 출력)  
    &nbsp;df_data_cal.to_csv(data_dir + 'KASI_DATE_D_Final.csv', index = False, encoding='utf-8-sig')  
    &emsp;- Columns : ['YEAR', 'MONTH', 'DAY'  
    &emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;, 'dateKind', 'code_day_of_the_week', 'day_of_the_week'  
    &emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;, 'rest_YN', 'name_of_holiday', 'dist_from_holiday']
    &emsp;- Period :   
    &emsp;- Interval :  
    
    2) Daily (관측소/년도별 출력)  
    &nbsp;df_data_cal_24.to_csv(data_dir + 'KASI_DATE_H_Final.csv', index = False, encoding='utf-8-sig')  
    &emsp;- Columns : ['locdate', 'YEAR', 'MONTH', 'DAY'  
    &emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;, 'dateKind', 'code_day_of_the_week', 'day_of_the_week'  
    &emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;, 'rest_YN', 'name_of_holiday', 'dist_from_holiday'  
    &emsp;&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;, 'HOUR', 'MINUTE']
    &emsp;- Period :   
    &emsp;- Interval :  
    
    

 - Related Link  
    &ensp; : 

# 01. Code

## 01-01. Init

### 01-01-01. Init_Module Import

In [None]:
#region Basic_Import
## Basic
import os, sys, warnings
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.path.dirname(os.path.abspath('./__file__'))
sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname('./__file__'))))
warnings.filterwarnings('ignore')

import numpy as np, pandas as pd
from pandas import DataFrame, Series
pd.options.display.float_format = '{:.10f}'.format

import math, random

## Datetime
import time, datetime as dt
from datetime import datetime, date, timedelta

## glob
import glob, requests, json
from glob import glob

## 시각화
import matplotlib.pyplot as plt, seaborn as sns
# %matplotlib inline
plt.rcParams['figure.figsize'] = [10, 8]

from scipy import stats

## Split, 정규화
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler

# K-Means 알고리즘
from sklearn.cluster import KMeans, MiniBatchKMeans

# Clustering 알고리즘의 성능 평가 측도
from sklearn import metrics
from sklearn.metrics import homogeneity_score, completeness_score, v_measure_score, adjusted_rand_score, silhouette_score, rand_score, calinski_harabasz_score, davies_bouldin_score
from sklearn.metrics.cluster import contingency_matrix

## For Web
import urllib
from urllib.request import urlopen
from urllib.parse import urlencode, unquote, quote_plus
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
#endregion Basic_Import

In [None]:
## Import_DL
str_tar = "tf"
## For Torch
if str_tar == "torch":
    import torch, torch.nn as nn
    from torch.nn.utils import weight_norm
    print("Torch Imported")
## For TF
elif str_tar == "tf":
    import tensorflow as tf, tensorflow_addons as tfa
    from keras.callbacks import EarlyStopping, ModelCheckpoint
    from keras.models import Sequential, load_model
    from keras_flops import get_flops
    print("Tensorflow Imported")
else:
    print("Error : Cannot be used except for Keywords")
    print(" : torch / tf")

In [None]:
## Import_Local
from Src_Dev_Common import Data_Datetime as com_date, KMA_Weather as com_KMA, KECO_AirKor as com_KECO, KASI_Holiday as com_Holi, KIER_Usage_M02 as com_KIER_M02

### 01-01-02. Config (Directory, Params)

In [None]:
## Init_config
SEED = 42

np.random.seed(SEED)
tf.random.set_seed(SEED)
random.seed(SEED)
os.environ["PYTHONHASHSEED"], os.environ['TF_DETERMINISTIC_OPS'] = str(SEED), "1"

In [None]:
## Define Todate str
str_now_ymd = pd.datetime.now().date()
str_now_y, str_now_m, str_now_d = pd.datetime.now().year, pd.datetime.now().month, pd.datetime.now().day
str_now_hr, str_now_min = pd.datetime.now().hour, pd.datetime.now().minute

print(pd.datetime.now())
print(str(str_now_y) + " / " + str(str_now_m)  + " / " + str(str_now_d))
print(str(str_now_hr) + " : " + str(str_now_min))

In [None]:
## Dict_Domain
## {0:"ELEC", 1:"HEAT", 2:"WATER", 3:"HOT_HEAT", 4:"HOT_FLOW", 99:"GAS"}
## {'10min', '1H', '1D', '1W', '1M'}
int_domain, int_interval = 0, 4

## Domain, ACCU/INST Column
str_domain, str_col_accu, str_col_inst = com_KIER_M02.create_domain_str(int_domain)
## Directory Root
str_dirData, str_dir_raw, str_dir_cleansed, str_dirName_bld, str_dirName_h = com_KIER_M02.create_dir_str(str_domain)
## Interval, Target File
str_interval, str_fileRaw, str_fileRaw_hList, str_file = com_KIER_M02.create_file_str(str_domain, int_interval)

str_file_clustering = 'KIER_' + str(str_domain) + '_Labeled_' + str_interval + '.csv'

print(str(os.listdir(str_dirData)) + "\n")
print(os.listdir(str_dirName_h))

## 01-02. Data Load (df_raw)

### 01-02-01. KIER Energy Usage (Intergrated)

In [None]:
str_file = 'KIER_' + str_domain + '_INST_Weather_ALL_' + str_interval + '.csv'
df_kier_raw = pd.read_csv(str_dirName_h + str_file
                          , index_col = 0)
df_kier_raw

In [None]:
## Date Data
list_col_date = df_kier_raw.columns[:5].to_list()
## Weather Data
list_col_weather = df_kier_raw.columns[5:22].to_list()
df_kier_raw_weather = df_kier_raw[list_col_weather]
df_kier_raw_weather.insert(0, 'METER_DATE', df_kier_raw['METER_DATE'])
## Usage Data
list_kier_h_all = df_kier_raw.columns[22:].to_list()
## - 직전 시간 데이터
df_kier_raw_prev = df_kier_raw[list_kier_h_all]
df_kier_raw_prev.insert(0, 'METER_DATE', df_kier_raw['METER_DATE'])
df_kier_raw_prev.head(5)

In [None]:
df_kier_raw_prev['METER_DATE'] = df_kier_raw_prev['METER_DATE'].shift(-1)
df_kier_raw_prev.head(5)

In [None]:
df_kier_h_cluster = pd.read_csv(str_dirName_h + str_file_clustering, index_col = 0).rename(columns = {'index' : 'h_index'})[['h_index', 'target_' + str_domain]]
list_kier_h_c0 = df_kier_h_cluster[df_kier_h_cluster['target_' + str_domain] == 0]['h_index']
print(len(list_kier_h_c0))
list_kier_h_c1 = df_kier_h_cluster[df_kier_h_cluster['target_' + str_domain] == 1]['h_index']
print(len(list_kier_h_c1))
list_kier_h_c2 = df_kier_h_cluster[df_kier_h_cluster['target_' + str_domain] == 2]['h_index']
print(len(list_kier_h_c2))

In [None]:
## 전체 사용량 합계
df_kier_h_all = df_kier_raw_weather.copy()
df_kier_h_tmp = df_kier_raw[list_kier_h_all]
df_kier_h_all['ELEC_INST_SUM'] = df_kier_h_tmp.sum(axis = 1)
df_kier_h_tmp['METER_DATE'] = df_kier_raw_weather['METER_DATE']
df_kier_h_all = pd.merge(df_kier_h_all, df_kier_raw_prev, how = 'left', on = 'METER_DATE').dropna()

## Cluster별 사용량 합계
## ■ C00
df_kier_h_c0 = df_kier_raw_weather.copy()
df_kier_h_tmp = df_kier_raw[list_kier_h_c0]
df_kier_h_c0['ELEC_INST_SUM_C0'] = df_kier_h_tmp.sum(axis = 1)
df_kier_h_tmp['METER_DATE'] = df_kier_raw_weather['METER_DATE']
df_kier_h_c0['ELEC_INST_SUM_C0'] = df_kier_h_tmp.sum(axis = 1)
df_kier_h_c0 = pd.merge(df_kier_h_c0, df_kier_raw_prev, how = 'left', on = 'METER_DATE').dropna()

# ## ■ C01
df_kier_h_c1 = df_kier_raw_weather.copy()
df_kier_h_tmp = df_kier_raw[list_kier_h_c1]
df_kier_h_c1['ELEC_INST_SUM_C1'] = df_kier_h_tmp.sum(axis = 1)
df_kier_h_tmp['METER_DATE'] = df_kier_raw_weather['METER_DATE']
df_kier_h_c1['ELEC_INST_SUM_C1'] = df_kier_h_tmp.sum(axis = 1)
df_kier_h_c1 = pd.merge(df_kier_h_c1, df_kier_raw_prev, how = 'left', on = 'METER_DATE').dropna()

# ## ■ C02
df_kier_h_c2 = df_kier_raw_weather.copy()
df_kier_h_tmp = df_kier_raw[list_kier_h_c2]
df_kier_h_c2['ELEC_INST_SUM_C2'] = df_kier_h_tmp.sum(axis = 1)
df_kier_h_tmp['METER_DATE'] = df_kier_raw_weather['METER_DATE']
df_kier_h_c2['ELEC_INST_SUM_C2'] = df_kier_h_tmp.sum(axis = 1)
df_kier_h_c2 = pd.merge(df_kier_h_c2, df_kier_raw_prev, how = 'left', on = 'METER_DATE').dropna()

In [None]:
df_kier_h_c2

In [None]:
# ## 전체 사용량 합계
# df_kier_h_all = df_kier_raw[list_kier_h_all]
# df_kier_h_all['ELEC_INST_SUM'] = df_kier_h_all.sum(axis = 1)
# df_kier_h_all.insert(0, 'METER_DATE', df_kier_raw['METER_DATE'])
# df_kier_h_all = pd.merge(df_kier_raw_weather, df_kier_h_all
#                          , how = 'left', on = ['METER_DATE'])
# list_kier_h_all = ['METER_DATE'] + df_kier_raw_weather + ['ELEC_INST_SUM']
# df_kier_h_all = df_kier_h_all[list_kier_h_all]

# ## Cluster별 사용량 합계
# ## ■ C00
# df_kier_h_c0 = df_kier_raw[list_kier_h_c0]
# df_kier_h_c0['ELEC_INST_SUM_C0'] = df_kier_h_c0.sum(axis = 1)
# df_kier_h_c0.insert(0, 'METER_DATE', df_kier_raw['METER_DATE'])
# ## 합계행만 남기기 (필요시 세대별 사용량 복원 가능)
# df_kier_h_c0 = df_kier_h_c0[['METER_DATE', 'ELEC_INST_SUM_C0']]

# ## ■ C01
# df_kier_h_c1 = df_kier_raw[list_kier_h_c1]
# df_kier_h_c1['ELEC_INST_SUM_C1'] = df_kier_h_c1.sum(axis = 1)
# df_kier_h_c1.insert(0, 'METER_DATE', df_kier_raw['METER_DATE'])
# ## 합계행만 남기기 (필요시 세대별 사용량 복원 가능)
# df_kier_h_c1 = df_kier_h_c1[['METER_DATE', 'ELEC_INST_SUM_C1']]

# ## ■ C02
# df_kier_h_c2 = df_kier_raw[list_kier_h_c2]
# df_kier_h_c2['ELEC_INST_SUM_C2'] = df_kier_h_c2.sum(axis = 1)
# df_kier_h_c2.insert(0, 'METER_DATE', df_kier_raw['METER_DATE'])
# ## 합계행만 남기기 (필요시 세대별 사용량 복원 가능)
# df_kier_h_c2 = df_kier_h_c2[['METER_DATE', 'ELEC_INST_SUM_C2']]

In [None]:
df_kier_h_all

## 01-03. Data Integeration

In [None]:
str_file = 'KIER_' + str_domain + '_INST_Weather_ALL_' + str_interval + '.csv'
df_kier_h_all.to_csv(str_dirName_h + str_file)
print(df_kier_h_all.info())
df_kier_h_all

In [None]:
str_file = 'KIER_' + str_domain + '_INST_Weather_C0_' + str_interval + '.csv'
df_kier_h_c0.to_csv(str_dirName_h + str_file)
print(df_kier_h_c0.info())
df_kier_h_c0

In [None]:
str_file = 'KIER_' + str_domain + '_INST_Weather_C1_' + str_interval + '.csv'
df_kier_h_c1.to_csv(str_dirName_h + str_file)
print(df_kier_h_c1.info())
df_kier_h_c1

In [None]:
str_file = 'KIER_' + str_domain + '_INST_Weather_C2_' + str_interval + '.csv'
df_kier_h_c2.to_csv(str_dirName_h + str_file)
print(df_kier_h_c2.info())
df_kier_h_c2