### Code Hist.

 - CODE : Model - KIER Method 02(Clustering)  
 - DESC : 각 군집별 Model Analysis 및 Evaluation  
 - DATE  
   &ensp; 2024-08-20 Created : "M02-03_Model_ML-01_Single.ipynb"에 Clustering 및 각 군집화 Case별 Cross Validation 적용  

# 01. Code

## 01-01. Init

### 01-01-01. Init_Module Import

In [1]:
#region Basic_Import
## Basic
import os, sys, warnings
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.path.dirname(os.path.abspath('./__file__'))
sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname('./__file__'))))
warnings.filterwarnings('ignore')

import numpy as np, pandas as pd
from pandas import DataFrame, Series
pd.options.display.float_format = '{:.10f}'.format

import math, random

## Datetime
import time, datetime as dt
from datetime import datetime, date, timedelta

## glob
import glob, requests, json
from glob import glob

## 시각화
import matplotlib.pyplot as plt, seaborn as sns
# %matplotlib inline
plt.rcParams['figure.figsize'] = [10, 8]

from scipy import stats

## Split, 정규화
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler

# K-Means 알고리즘
from sklearn.cluster import KMeans, MiniBatchKMeans

# Clustering 알고리즘의 성능 평가 측도
from sklearn import metrics
from sklearn.metrics import homogeneity_score, completeness_score, v_measure_score, adjusted_rand_score, silhouette_score, rand_score, calinski_harabasz_score, davies_bouldin_score
from sklearn.metrics.cluster import contingency_matrix

## Modeling, Model Training
from sklearn.model_selection import train_test_split, KFold, GridSearchCV

## Grid Search
# kfold = KFold(n_splits = 5, shuffle = False, random_state = None)

## For Web
import urllib
from urllib.request import urlopen
from urllib.parse import urlencode, unquote, quote_plus
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
#endregion Basic_Import

In [2]:
## Import_DL
str_tar = "tf"
## For Torch
if str_tar == "torch":
    import torch, torch.nn as nn
    from torch.nn.utils import weight_norm
    print("Torch Imported")
## For TF
elif str_tar == "tf":
    import tensorflow as tf, tensorflow_addons as tfa
    from keras.callbacks import EarlyStopping, ModelCheckpoint
    from keras.models import Sequential, load_model
    from keras_flops import get_flops
    print("Tensorflow Imported")
else:
    print("Error : Cannot be used except for Keywords")
    print(" : torch / tf")

Tensorflow Imported


In [3]:
## Import_Local
from Src_Dev_Common import Common_Model_DL as com_DL, Data_Datetime as com_date, KMA_Weather as com_KMA, KECO_AirKor as com_KECO, KASI_Holiday as com_Holi, KIER_Usage_M02 as com_KIER_M02, Data_Analysis as com_Analysis, Data_Clustering as com_clustering

### 01-01-02. Config (Directory, Params)

In [4]:
## Init_config
SEED = 42

np.random.seed(SEED)
tf.random.set_seed(SEED)
random.seed(SEED)
os.environ["PYTHONHASHSEED"], os.environ['TF_DETERMINISTIC_OPS'] = str(SEED), "1"

In [5]:
## Define Todate str
str_now_ymd = pd.datetime.now().date()
str_now_y, str_now_m, str_now_d = pd.datetime.now().year, pd.datetime.now().month, pd.datetime.now().day
str_now_hr, str_now_min = pd.datetime.now().hour, pd.datetime.now().minute

print(pd.datetime.now())
print(str(str_now_y) + " / " + str(str_now_m)  + " / " + str(str_now_d))
print(str(str_now_hr) + " : " + str(str_now_min))

2024-09-25 13:38:15.297243
2024 / 9 / 25
13 : 38


## 01-01. 군집화 부분 함수화

### 01-01-01. KIER (Energy Usage)

In [6]:
def cluster_label():
    df_kier_raw = pd.read_csv(str_dirName_h + str_file, index_col = 0)
    df_kier_raw['METER_DATE'] = pd.to_datetime(df_kier_raw['METER_DATE'])

    ## 호실별 순시 사용량 컬럼만 가져오기
    list_col_tar = list(df_kier_raw.columns[1:])
    df_kier_h = df_kier_raw.set_index('METER_DATE')

    # ## Error Log : "[5:-2]" 부분을 추가하여 연월일시 및 평균합계 부분을 제거해주지 않으면, 군집화 계수가 제대로 도출되지 못함.
    # df_kier_summary_total = df_kier_h.transpose().reset_index()[5:-2]
    # ## 또는, 가장 깔끔하게 이렇게 처리해도 좋다
    df_kier_summary_total = df_kier_h[list_col_tar].transpose().reset_index()

    ## 세대 번호의 컬럼명이 'index'로 지정되어 오류 발생
    df_kier_summary_total['h_index'] = df_kier_summary_total['index']
    df_kier_summary_total = df_kier_summary_total.drop(columns = ['index'])

    X = df_kier_summary_total.drop(columns = 'h_index')
    y = df_kier_summary_total['h_index']

    # 변수 표준화
    scaler = StandardScaler() # 변수 표준화 클래스
    scaler.fit(X)  # 표준화를 위해 변수별 파라미터(평균, 표준편차) 계산
    X_std = scaler.transform(X)  # 훈련자료 표준화 변환

    ## 최종 군집에 대한 Labeled Data 저장
    km = KMeans(n_clusters = K, init="k-means++", max_iter=300, n_init=1).fit(X_std)
    list_size_cluster = com_clustering.get_cluster_sizes(km, X_std) ## 최종 군집화에 대한 군집 크기
    df_kier_summary_total['target_'+str_domain] = 0
    for i in range(0, len(df_kier_summary_total)) : df_kier_summary_total['target_'+str_domain].iloc[i] = km.labels_[i]

    str_file_labeled = str_dirName_h + 'KIER_' + str(str_domain) + '_Labeled_' + str_interval + '_K' + str(K) + '.csv'
    df_kier_summary_total = df_kier_summary_total[['h_index', 'target_'+str_domain]]
    df_kier_summary_total.to_csv(str_file_labeled)

    return df_kier_summary_total, list_size_cluster

## 01-02. Data Load 및 준비 부분 함수화

### 01-02-01. KMA ASOS

In [7]:
def load_dataset_Not_cluster():
    ## ▶ Dataset 불러오기
    ## 1. Interpolate / Filled ASOS Data
    str_file = '../data_Energy_KIER/KMA_ASOS_119_2010_2023_1st_to CSV.csv'
    df_ASOS = pd.read_csv(str_file, index_col = 0).reset_index()

    try : df_ASOS['METER_DATE'] = pd.to_datetime(df_ASOS['METER_DATE'])
    except KeyError : df_kier_raw = com_date.create_col_datetime(df_ASOS, 'METER_DATE', 'YEAR', 'MONTH', 'DAY', 'HOUR', 'MINUTE').drop(labels = ['None'], axis = 1)

    ## 3. 1시간 단위 사용량 Data Load
    str_file = 'KIER_' + str_domain + '_INST_1H_Resampled.csv'
    df_raw = pd.read_csv(str_dirName_h + str_file, index_col = 0)

    ## ▶ h_index에 따라 Dataset 분리
    ## 1. 각 index별 house 목록 생성
    list_kier_h_all = df_kier_h_cluster['h_index']

    ## 2. 전체 사용량 합계 구하기
    df_kier_h_all = df_raw.copy()
    df_kier_h_all['METER_DATE'] = pd.to_datetime(df_kier_h_all['METER_DATE'])
    df_kier_h_tmp = df_raw[list_kier_h_all]
    df_kier_h_all[str_domain + '_INST_SUM_ALL'] = df_kier_h_tmp.sum(axis = 1)
    ## 시점을 밀어서, 세대별 사용량을 과거 사용량으로 사용
    df_kier_h_all[str_domain + '_INST_SUM_ALL'] = df_kier_h_all[str_domain + '_INST_SUM_ALL'].shift(1)
    df_kier_h_all.dropna()

    ## 4. 날씨 데이터 추가
    df_kier_h_all = pd.merge(df_kier_h_all, df_ASOS, how = 'left', on = ['METER_DATE'])
    df_kier_h_all = com_KMA.Interpolate_KMA_ASOS(df_kier_h_all)
    df_kier_h_all = com_date.create_col_ymdhm(df_kier_h_all, 'METER_DATE')

    # str_col_tar = str_domain + '_INST_SUM_' + dict_grp[int_grp]
    str_col_tar = str_domain + '_INST_SUM_ALL'
    df_tar_res = df_kier_h_all.drop(columns = ['METER_DATE', 'DAY']).dropna()

    return df_tar_res, str_col_tar

In [8]:
def load_dataset_cluster(int_grp):
    ## ▶ Dataset 불러오기
    ## 1. Interpolate / Filled ASOS Data
    str_file = '../data_Energy_KIER/KMA_ASOS_119_2010_2023_1st_to CSV.csv'
    df_ASOS = pd.read_csv(str_file, index_col = 0).reset_index()

    try : df_ASOS['METER_DATE'] = pd.to_datetime(df_ASOS['METER_DATE'])
    except KeyError : df_kier_raw = com_date.create_col_datetime(df_ASOS, 'METER_DATE', 'YEAR', 'MONTH', 'DAY', 'HOUR', 'MINUTE').drop(labels = ['None'], axis = 1)

    ## 2. Labeled Data Load
    ## Cluster 기준 Interval
    str_file_clustering = 'KIER_' + str(str_domain) + '_Labeled_' + str_interval + '_K' + str(K) + '.csv'
    df_kier_h_cluster = pd.read_csv(str_dirName_h + str_file_clustering
                                    , index_col = 0).rename(columns = {'index' : 'h_index'})[['h_index', 'target_' + str_domain]]
    # print(str_interval)
    # print(df_kier_h_cluster['target_' + str_domain].drop_duplicates())
    # df_kier_h_cluster

    ## 3. 1시간 단위 사용량 Data Load
    str_file = 'KIER_' + str_domain + '_INST_1H_Resampled.csv'
    df_raw = pd.read_csv(str_dirName_h + str_file, index_col = 0)



    ## ▶ h_index에 따라 Dataset 분리
    ## 1. 각 index별 house 목록 생성
    list_kier_h_all = df_kier_h_cluster['h_index']
    # print(len(list_kier_h_all))
    list_kier_h_c0 = df_kier_h_cluster[df_kier_h_cluster['target_' + str_domain] == 0]['h_index']
    # print(len(list_kier_h_c0))
    list_kier_h_c1 = df_kier_h_cluster[df_kier_h_cluster['target_' + str_domain] == 1]['h_index']
    # print(len(list_kier_h_c1))

    if K == 3 : list_kier_h_c2 = df_kier_h_cluster[df_kier_h_cluster['target_' + str_domain] == 2]['h_index']
    # print(len(list_kier_h_c2))

    ## 2. 전체 사용량 합계 구하기
    df_kier_h_all = df_raw.copy()
    df_kier_h_all['METER_DATE'] = pd.to_datetime(df_kier_h_all['METER_DATE'])
    df_kier_h_tmp = df_raw[list_kier_h_all]
    df_kier_h_all[str_domain + '_INST_SUM_ALL'] = df_kier_h_tmp.sum(axis = 1)
    ## 시점을 밀어서, 세대별 사용량을 과거 사용량으로 사용
    df_kier_h_all[str_domain + '_INST_SUM_ALL'] = df_kier_h_all[str_domain + '_INST_SUM_ALL'].shift(1)
    df_kier_h_all.dropna()

    ## 3. Cluster별 사용량 합계 산출
    ## ■ C00
    df_kier_h_c0 = df_raw.copy()[list_kier_h_c0]
    df_kier_h_c0['METER_DATE'] = pd.to_datetime(df_kier_h_all['METER_DATE'])
    df_kier_h_tmp = df_raw[list_kier_h_c0]
    df_kier_h_c0[str_domain + '_INST_SUM_C0'] = df_kier_h_tmp.sum(axis = 1)
    ## 시점을 밀어서, 세대별 사용량을 과거 사용량으로 사용
    df_kier_h_c0[str_domain + '_INST_SUM_C0'] = df_kier_h_c0[str_domain + '_INST_SUM_C0'].shift(1)
    df_kier_h_c0.dropna()

    ## ■ C01
    df_kier_h_c1 = df_raw.copy()[list_kier_h_c1]
    df_kier_h_c1['METER_DATE'] = pd.to_datetime(df_kier_h_all['METER_DATE'])
    df_kier_h_tmp = df_raw[list_kier_h_c1]
    df_kier_h_c1[str_domain + '_INST_SUM_C1'] = df_kier_h_tmp.sum(axis = 1)
    ## 시점을 밀어서, 세대별 사용량을 과거 사용량으로 사용
    df_kier_h_c1[str_domain + '_INST_SUM_C1'] = df_kier_h_c1[str_domain + '_INST_SUM_C1'].shift(1)
    df_kier_h_c1.dropna()

    if K == 3:
        ## ■ C02
        df_kier_h_c2 = df_raw.copy()[list_kier_h_c2]
        df_kier_h_c2['METER_DATE'] = pd.to_datetime(df_kier_h_all['METER_DATE'])
        df_kier_h_tmp = df_raw[list_kier_h_c2]
        df_kier_h_c2[str_domain + '_INST_SUM_C2'] = df_kier_h_tmp.sum(axis = 1)
        ## 시점을 밀어서, 세대별 사용량을 과거 사용량으로 사용
        df_kier_h_c2[str_domain + '_INST_SUM_C2'] = df_kier_h_c2[str_domain + '_INST_SUM_C2'].shift(1)
        df_kier_h_c2.dropna()

    ## 4. 날씨 데이터 추가
    df_kier_h_all = pd.merge(df_kier_h_all, df_ASOS, how = 'left', on = ['METER_DATE'])
    df_kier_h_all = com_KMA.Interpolate_KMA_ASOS(df_kier_h_all)
    df_kier_h_all = com_date.create_col_ymdhm(df_kier_h_all, 'METER_DATE')

    df_kier_h_c0 = pd.merge(df_kier_h_c0, df_ASOS, how = 'left', on = ['METER_DATE'])
    df_kier_h_c0 = com_KMA.Interpolate_KMA_ASOS(df_kier_h_c0)
    df_kier_h_c0 = com_date.create_col_ymdhm(df_kier_h_c0, 'METER_DATE')

    df_kier_h_c1 = pd.merge(df_kier_h_c1, df_ASOS, how = 'left', on = ['METER_DATE'])
    df_kier_h_c1 = com_KMA.Interpolate_KMA_ASOS(df_kier_h_c1)
    df_kier_h_c1 = com_date.create_col_ymdhm(df_kier_h_c1, 'METER_DATE')

    if K == 3:
        df_kier_h_c2 = pd.merge(df_kier_h_c2, df_ASOS, how = 'left', on = ['METER_DATE'])
        df_kier_h_c2 = com_KMA.Interpolate_KMA_ASOS(df_kier_h_c2)
        df_kier_h_c2 = com_date.create_col_ymdhm(df_kier_h_c2, 'METER_DATE')

    ## 모든 세대
    if int_grp == 0 : df_tar_res = df_kier_h_all.drop(columns = ['METER_DATE', 'DAY']).dropna()
    ## 군집 C0
    elif int_grp == 1 : df_tar_res = df_kier_h_c0.drop(columns = ['METER_DATE', 'DAY']).dropna()
    ## 군집 C1
    elif int_grp == 2 : df_tar_res = df_kier_h_c1.drop(columns = ['METER_DATE', 'DAY']).dropna()
    ## 군집 C0
    elif int_grp == 3 : df_tar_res = df_kier_h_c2.drop(columns = ['METER_DATE', 'DAY']).dropna()

    str_col_tar = str_domain + '_INST_SUM_' + dict_grp[int_grp]

    return df_tar_res, str_col_tar

In [9]:
## Build Dataset
def buildDataSet(traindata, testdata, seqLength):
    xdata = []
    ydata = []

    for i in range(len(traindata)-seqLength+1):
        tx = traindata.iloc[i:i+seqLength]
        ty = testdata.iloc[i+seqLength-1]
        xdata.append(tx)
        ydata.append(ty)

    return np.array(xdata), np.array(ydata)

In [10]:
def seq2seq_model(input_shape):
    model_input = tf.keras.layers.Input(shape=input_shape)

    # for feature extracting
    conv1 = tf.keras.layers.Conv1D(64, 1, activation='swish')(model_input)
    pool1 = tf.keras.layers.MaxPool1D(pool_size=2, strides=1, padding='same')(conv1)
    bat01 = tf.keras.layers.BatchNormalization()(pool1)
    conv2 = tf.keras.layers.Conv1D(32, 1, activation='swish')(bat01)
    pool2 = tf.keras.layers.MaxPool1D(pool_size=2, strides=1, padding='same')(conv2)
    bat02 = tf.keras.layers.BatchNormalization()(pool2)

    # 인코더 - 디코더 선언
    encoder_lstm1 = tf.keras.layers.LSTM(16, return_sequences=True, activation='swish')
    encoder_lstm2 = tf.keras.layers.LSTM(32, return_sequences=True, activation='swish')
    encoder_lstm3 = tf.keras.layers.LSTM(64, return_state=True, return_sequences=True, activation='swish')

    decoder_lstm1 = tf.keras.layers.LSTM(64, return_sequences=True, activation='swish')
    decoder_lstm2 = tf.keras.layers.LSTM(32, return_sequences=True, activation='swish')
    decoder_lstm3 = tf.keras.layers.LSTM(16, return_sequences=True, activation='swish')

    # 인코더
    encoder_output_lstm1 = encoder_lstm1(bat02)
    encoder_output_lstm2 = encoder_lstm2(bat01)
    encoder_output_lstm4, state_h, state_c = encoder_lstm3(encoder_output_lstm2)

    #디코더
    decoder_lstm1_output = decoder_lstm1(encoder_output_lstm4, initial_state=[state_h, state_c])
    decoder_lstm2_output = decoder_lstm2(decoder_lstm1_output)
    decoder_lstm3_output = decoder_lstm3(decoder_lstm2_output)

    flatten = tf.keras.layers.Flatten()(decoder_lstm3_output)
    model_output = tf.keras.layers.Dense(1)(flatten)
    
    model = tf.keras.models.Model(model_input, model_output)
    
    return model

## Proceed

In [11]:
import sys
from sklearn.model_selection import KFold, TimeSeriesSplit 
np.set_printoptions(threshold=np.inf, linewidth=np.inf)

float_rate = 0.3
# test_size = round(len(df_tar) * float_rate)
int_fold = 10

## Dict_Domain
## {0:"ELEC", 1:"HEAT", 2:"WATER", 3:"HOT_HEAT", 4:"HOT_FLOW", 99:"GAS"}
## K : 2 or 3
## {0 : '10MIN', 1 : '1H', 2 : '1D', 3 : '1W', 4 : '1M'}
## {0 : 'ALL', 1 : 'C0', 2 : 'C1', 3 : 'C2'}
# dict_ml_model = {0 : 'CB', 1 : 'DT', 2 : 'LGBM', 3 : 'RF', 4 : 'XGB'}
dict_dl_model = {0 : '1D-CNN_LSTM', 1 : ''}
dict_interval = {0 : '10MIN', 1 : '1H', 2 : '1D', 3 : '1W', 4 : '1M'}
dict_grp = {0 : 'ALL', 1 : 'C0', 2 : 'C1', 3 : 'C2'}
int_domain, int_grp = 0, 1

K = 3 ## 2, 3
int_interval = 3 ## 3, 4
int_model = 0 ## 0, 1, 2, 3, 4

## Domain, ACCU/INST Column
str_domain, str_col_accu, str_col_inst = com_KIER_M02.create_domain_str(int_domain)
## Directory Root
str_dirData, str_dir_raw, str_dir_cleansed, str_dirName_bld, str_dirName_h = com_KIER_M02.create_dir_str(str_domain)
## Interval, Target File
str_interval, str_fileRaw, str_fileRaw_hList, str_file = com_KIER_M02.create_file_str(str_domain, int_interval)

# print(str(os.listdir(str_dirData)) + "\n")
# print(os.listdir(str_dirName_h))

str_file_clustering = 'KIER_' + str(str_domain) + '_Labeled_' + str_interval + '_K' + str(K) + '.csv'
df_kier_h_cluster = pd.read_csv(str_dirName_h + str_file_clustering
                                , index_col = 0).rename(columns = {'index' : 'h_index'})[['h_index', 'target_' + str_domain]]
df_kier_h_cluster

0 : ELEC
str_fileRaw : KIER_RAW_ELEC_2024-06-07.csv
str_fileRaw_hList : KIER_RAW_ELEC_2024-06-07.csv
str_file : KIER_ELEC_INST_1W_Resampled.csv


Unnamed: 0,h_index,target_ELEC
0,ELEC_INST_EFF_561-1-1,2
1,ELEC_INST_EFF_561-1-2,2
2,ELEC_INST_EFF_561-1-3,1
3,ELEC_INST_EFF_561-1-4,2
4,ELEC_INST_EFF_561-2-1,2
...,...,...
343,ELEC_INST_EFF_563-23-2,2
344,ELEC_INST_EFF_563-23-3,1
345,ELEC_INST_EFF_563-23-4,0
346,ELEC_INST_EFF_563-24-1,1


In [12]:
## 비군집화 데이터셋에 대한 별도 처리 (비교군)
sys.stdout.flush() ## flush
df_tar, str_col_tar = load_dataset_Not_cluster()
seqLength = 24

In [13]:
## Data Split
trainSet_Origin, testSet_Origin = train_test_split(df_tar, test_size = float_rate, shuffle = False)

trainSet, testSet = trainSet_Origin, testSet_Origin

## Input / Target Split
trainXX, trainYY = trainSet.drop([str_col_tar],axis=1), trainSet[[str_col_tar]]
testXX, testYY = testSet.drop([str_col_tar],axis=1), testSet[[str_col_tar]]

trainXXindex, trainYYindex = trainXX.index, trainYY.index
trainXXcolumns, trainYYcolumns = trainXX.columns, trainYY.columns

testXXindex, testYYindex = testXX.index, testYY.index
testXXcolumns, testYYcolumns = testXX.columns, testYY.columns

d_trainXX, d_trainYY = pd.DataFrame(trainXX, index=trainXXindex, columns=trainXXcolumns), trainYY

d_testXX, d_testYY = pd.DataFrame(testXX, index=testXXindex, columns=testXXcolumns), testYY

## Build Dataset
trainX, trainY = buildDataSet(trainXX, trainYY, seqLength)
testX, testY = buildDataSet(testXX, testYY, seqLength)

In [14]:
trainXXcolumns = trainXX.columns
int_len_col_input = len(trainXXcolumns)

In [15]:
## SeqLength 초기값 : 24
str_model, model = com_DL.buildModel_KIERM02_1DCNNSeq2Seq(input_shape=(seqLength, int_len_col_input))
# d_actual, model_preds, tm_code = com_DL.model_dl_predict_KIERM02(trainX, trainY, testX, testY, model)



In [16]:
# ## 임시조치
# ## ValueError: Mean Squared Logarithmic Error cannot be used when targets contain negative values.
# cnt_negative = 0
# for i in range(0, len(model_preds)) : 
#     if model_preds[i] < 0 : 
#         model_preds[i] = model_preds[i] * -1
#         cnt_negative = cnt_negative + 1

# for i in range(0, len(model_preds)) : 
#     if model_preds[i] < 0 : print(model_preds[i])

# if cnt_negative != 0 : print(cnt_negative)

In [17]:
# list_scores = com_DL.model_sk_metrics(d_actual, model_preds)
# list_scores.append(tm_code)

# print(list_scores)

In [18]:
## 비군집화 데이터셋에 대한 별도 처리 (비교군)
sys.stdout.flush() ## flush
df_tar, str_col_tar = load_dataset_Not_cluster()
## Non-Shuffle
# list_res, list_hists = com_DL.model_dl_analysis_with_KFold(df_tar, float_rate, model, str_col_tar, int_fold, seqLength)
## Shuffle
list_res, list_hists = com_DL.model_dl_analysis_with_KFold(df_tar, float_rate, model, str_col_tar, int_fold, str_shuffle = True, seqLength = seqLength)

## list_res 저장
str_txt = '../kf_result_include_Clustering_' + str_model + '/kf_result_' + str(dict_interval[int_interval]) + '_ALL_' + dict_grp[int_grp] + '_' + str_model + '_CV' + str(int_fold) + '.txt'
file_txt = open(str_txt, 'w')
print('- Interval = ' + dict_interval[int_interval] + '\n'
        + '- K = 0' + '\n'
        + '- grp = ALL' + '\n'
        + '- model = ' + str_model + '\n'
        + '- Case = ALL' + ',' + ' size_cluster = ' + str(348) + '\n'
        + '- Size = ' + str(df_tar.shape) + '\n'
        + '- Columns = ' + str(df_tar.columns) + '\n', file = file_txt)
print(list_res, file = file_txt)

## list_hist 저장
str_txt = '../kf_hist_include_Clustering_' + str_model + '/kf_hist_' + str(dict_interval[int_interval]) + '_ALL_' + dict_grp[int_grp] + '_' + str_model + '_CV' + str(int_fold) + '.txt'
file_txt = open(str_txt, 'w')
print('- Interval = ' + dict_interval[int_interval] + '\n'
        + '- K = 0' + '\n'
        + '- grp = ALL' + '\n'
        + '- model = ' + str_model + '\n'
        + '- Case = ALL' + ',' + ' size_cluster = ' + str(348) + '\n'
        + '- Size = ' + str(df_tar.shape) + '\n'
        + '- Columns = ' + str(df_tar.columns) + '\n', file = file_txt)
print(list_hists, file = file_txt)

## open 후 다른 것을 open하면 자동으로 close되어 저장되지만,
## 마지막 파일은 반드시 close를 통해 종료해야만 저장이 완료됨
file_txt.close()

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500

In [18]:
## 군집화 데이터셋에 대한 별도 처리
for i in range (0, 3): ## 각 기간별 N번의 Clustering을 병행
        sys.stdout.flush() ## flush
        df_kier_summary_total, list_size_cluster = cluster_label()
        print(list_size_cluster)
        # df_kier_summary_total

        for int_grp in range(1, K + 1): ## 군집 형성된 데이터셋만 분석
                print('■ ' + str(int_grp))
                df_tar, str_col_tar = load_dataset_cluster(int_grp) ## 해당 군집에 대한 데이터셋 및 Target Column
                # print(df_tar.columns)
                # print(df_tar.shape)
                
                ## 임시 (더 적절한 변수로 지정해야함 + 함수화 필요)
                ## Data Split
                trainSet_Origin, testSet_Origin = train_test_split(df_tar, test_size = float_rate, shuffle = False)

                trainSet, testSet = trainSet_Origin, testSet_Origin

                ## Input / Target Split
                trainXX, trainYY = trainSet.drop([str_col_tar],axis=1), trainSet[[str_col_tar]]
                testXX, testYY = testSet.drop([str_col_tar],axis=1), testSet[[str_col_tar]]

                trainXXindex, trainYYindex = trainXX.index, trainYY.index
                trainXXcolumns, trainYYcolumns = trainXX.columns, trainYY.columns

                testXXindex, testYYindex = testXX.index, testYY.index
                testXXcolumns, testYYcolumns = testXX.columns, testYY.columns

                d_trainXX, d_trainYY = pd.DataFrame(trainXX, index=trainXXindex, columns=trainXXcolumns), trainYY

                d_testXX, d_testYY = pd.DataFrame(testXX, index=testXXindex, columns=testXXcolumns), testYY

                ## Build Dataset
                trainX, trainY = buildDataSet(trainXX, trainYY, seqLength)
                testX, testY = buildDataSet(testXX, testYY, seqLength)
                
                trainXXcolumns = trainXX.columns
                int_len_col_input = len(trainXXcolumns)

                # str_model, model = com_DL.buildModel_KIERM02_1DCNNLSTM(int_len_col_input)
                str_model, model = com_DL.buildModel_KIERM02_1DCNNSeq2Seq(input_shape=(seqLength, int_len_col_input))

                ## Not Shuffle
                # list_res, list_hists = com_DL.model_dl_analysis_with_KFold(df_tar, float_rate, model, str_col_tar, int_fold, str_shuffle = False, seqLength)
                ## Shuffle
                list_res, list_hists = com_DL.model_dl_analysis_with_KFold(df_tar, float_rate, model, str_col_tar, int_fold, str_shuffle = True, seqLength = seqLength)

                ## list_res 저장
                str_txt = '../kf_result_include_Clustering_' + str_model + '/kf_result_' + str(dict_interval[int_interval]) + '_K'  + str(K)  + '_Case0' + str(i) + '_' + dict_grp[int_grp] + '_' + str_model + '_CV' + str(int_fold) + '.txt'
                file_txt = open(str_txt, 'w')
                print('- Interval = ' + dict_interval[int_interval] + '\n'
                        + '- K = ' + str(K) + '\n'
                        + '- grp = C0' + str(int_grp) + '\n'
                        + '- model = ' + str_model + '\n'
                        + '- Case = 0' + str(i) + ',' + ' size_cluster = ' + str(list_size_cluster) + '\n'
                        + '- Size = ' + str(df_tar.shape) + '\n'
                        + '- Columns = ' + str(df_tar.columns) + '\n', file = file_txt)
                print(list_res, file = file_txt)

                ## list_hist 저장
                str_txt = '../kf_hist_include_Clustering_' + str_model + '/kf_hist_' + str(dict_interval[int_interval]) + '_K'  + str(K)  + '_Case0' + str(i) + '_' + dict_grp[int_grp] + '_' + str_model + '_CV' + str(int_fold) + '.txt'
                file_txt = open(str_txt, 'w')
                print('- Interval = ' + dict_interval[int_interval] + '\n'
                        + '- K = ' + str(K) + '\n'
                        + '- grp = C0' + str(int_grp) + '\n'
                        + '- model = ' + str_model + '\n'
                        + '- Case = 0' + str(i) + ',' + ' size_cluster = ' + str(list_size_cluster) + '\n'
                        + '- Size = ' + str(df_tar.shape) + '\n'
                        + '- Columns = ' + str(df_tar.columns) + '\n', file = file_txt)
                print(list_hists, file = file_txt)

                ## open 후 다른 것을 open하면 자동으로 close되어 저장되지만,
                ## 마지막 파일은 반드시 close를 통해 종료해야만 저장이 완료됨           
                file_txt.close()

[103, 78, 167]
■ 1
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Ep

FileNotFoundError: [Errno 2] No such file or directory: '../kf_hist_include_Clustering_1D-CNN_Seq2Seq/kf_hist_1W_K3_Case00_C0_1D-CNN_Seq2Seq_CV10.txt'