# 1. 환경설정

In [None]:
import numpy as np
import tensorflow as tf
import random
import os
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import keras
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import re
import matplotlib.pyplot as plt
import keras_tuner as kt
import matplotlib.dates as mdates

# Colab 의 한글 폰트 설정(현재 나눔바른고딕 폰트가 설치가 제대로 되지 않아 맑은 고딕으로 폰트 설정)
plt.rc('font', family='Malgun Gothic')

In [None]:
# 랜덤 시드 고정
np.random.seed(42)
tf.random.set_seed(42)

# 모든 랜덤 시드 설정 함수
def set_all_seeds(seed=42):
    np.random.seed(seed)
    tf.random.set_seed(seed)
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)

# 2. 데이터 불러오기 및 전처리

In [None]:
# 고수위 데이터 불러오기
Rain_WL = pd.read_excel('고수위_이벤트_데이터.xlsx')
Rain_WL.head()

In [None]:
scaler_X1 = MinMaxScaler()
scaler_Y = MinMaxScaler()

In [None]:
# 시퀀스 생성 함수 (리드 타임 포함)
def create_sequences_with_lead(X, y, sequence_length=1, lead_time=1):
    X_seq, Y_seq = [], []
    for i in range(len(X) - sequence_length - lead_time):
        X_seq.append(X[i:i + sequence_length])
        Y_seq.append(y[i + sequence_length + lead_time - 1])
    return np.array(X_seq), np.array(Y_seq)

In [None]:
# X, Y, 이벤트 ID 데이터 설정
X = Rain_WL[['금곡교 수위', '요천대교 수위', '금곡교 강우량', '요천대교 강우량']]
Y = Rain_WL['고달교 수위']
event_ids = Rain_WL['이벤트 번호']

In [None]:
# Function to plot water levels and rainfall for each event
def plot_event_data(data, feature_names, event_ids, event_name):
    plt.figure(figsize=(15, 10))

    num_features = len(feature_names)
    event_data = data[event_ids == event_name]
    
    time_steps = range(event_data.shape[0])
    
    for i, feature_name in enumerate(feature_names):
        plt.subplot(num_features, 1, i+1)
        plt.plot(time_steps, event_data[:], label=f'{feature_name}')
        plt.title(f'{event_name} - {feature_name}', fontsize=15)
        plt.xlabel('Time Step', fontsize=12)
        plt.ylabel(feature_name, fontsize=12)
        plt.legend()

    plt.tight_layout()
    plt.show()

# Features names for water levels and rainfall
feature_names = ['고달교 수위']

# Convert DataFrame to numpy array for easier indexing
X_array = X.values
Y_array = Y.values
event_ids_array = event_ids.values

# Get unique event names
unique_events = np.unique(event_ids_array)

# Plot for each event
for event_name in unique_events:
    plot_event_data(Y_array, feature_names, event_ids_array, event_name)

In [None]:
# 데이터 정규화
X_normalized = scaler_X1.fit_transform(X)
Y_normalized = scaler_Y.fit_transform(Y.values.reshape(-1, 1))

In [None]:
# 이벤트 ID 목록 가져오기 및 정렬
event_ids_unique = Rain_WL['이벤트 번호'].unique()
def extract_event_number(event_id):
    return int(re.search(r'\d+', event_id).group())
event_ids_unique_sorted = sorted(event_ids_unique, key=extract_event_number)

In [None]:
def split_data(sequence_length, lead_time):
    set_all_seeds(42)  # 랜덤 시드 고정
    
    X_train, X_val, X_test_1, X_test_2 = [], [], [], []
    Y_train, Y_val, Y_test_1, Y_test_2 = [], [], [], []
    assigned_train, assigned_val, assigned_test = [], [], []

    # 마지막 이벤트를 테스트 세트로 설정
    test_event_id_1 = event_ids_unique[-2]
    test_event_id_2 = event_ids_unique[-1]
    # 마지막에서 두 번째 이벤트를 검증 세트로 설정
    val_event_id = event_ids_unique[-3]

    # 각 이벤트에 대해 데이터 분할
    for event_id in event_ids_unique:
        set_all_seeds(42)
        event_indices = np.where(event_ids == event_id)[0]  # 이벤트에 해당하는 인덱스 추출
        event_data_X = X_normalized[event_indices]
        event_data_Y = Y_normalized[event_indices]

        # 시퀀스 생성
        if len(event_data_X) >= sequence_length:
            event_X_seq, event_Y_seq = create_sequences_with_lead(event_data_X, event_data_Y, sequence_length, lead_time)

            # 이벤트에 따라 데이터 세트 분할
            if event_id == test_event_id_1:
                X_test_1.extend(event_X_seq)
                Y_test_1.extend(event_Y_seq)
                assigned_test.append(event_id)
            elif event_id == test_event_id_2:
                X_test_2.extend(event_X_seq)
                Y_test_2.extend(event_Y_seq)
                assigned_test.append(event_id)
            elif event_id == val_event_id:
                X_val.extend(event_X_seq)
                Y_val.extend(event_Y_seq)
                assigned_val.append(event_id)
            else:
                X_train.extend(event_X_seq)
                Y_train.extend(event_Y_seq)
                assigned_train.append(event_id)

    # 리스트를 numpy 배열로 변환
    X_train, Y_train = np.array(X_train), np.array(Y_train)
    X_val, Y_val = np.array(X_val), np.array(Y_val)
    X_test_1, Y_test_1 = np.array(X_test_1), np.array(Y_test_1)
    X_test_2, Y_test_2 = np.array(X_test_2), np.array(Y_test_2)

    return X_train, Y_train, X_val, Y_val, X_test_1, Y_test_1, X_test_2, Y_test_2, assigned_train, assigned_val, assigned_test

# 3. 케라스 튜너 사용

In [None]:
from tensorflow.keras import Input, Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
import keras_tuner as kt
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# MyHyperModel 클래스 정의
class MyHyperModel(kt.HyperModel):
    # build 함수 정의
    def build(self, hp):
        sequence_length = hp.Int('sequence_length', min_value=6, max_value=24, step=6)
        
        model = Sequential()
        model.add(Input(shape=(sequence_length, X.shape[1])))
    
        model.add(LSTM(units=hp.Int('units', min_value=32, max_value=192, step=32), return_sequences=True))
        model.add(LSTM(units=hp.Int('units', min_value=32, max_value=192, step=32)))
    
        model.add(Dropout(rate=hp.Float('dropout', min_value=0.0, max_value=0.5, step=0.1)))
    
        model.add(Dense(units=1))
    
        model.compile(
            optimizer=Adam(learning_rate=hp.Choice('learning_rate', values=[1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6])),
            loss='mean_squared_error'
        )
        return model
    
    def fit(self, hp, model, *args, **kwargs):
        sequence_length = hp.get('sequence_length')
        lead_time = 1

        X_train, Y_train, X_val, Y_val, X_test_1, Y_test_1, X_test_2, Y_test_2, assigned_train, assigned_val, assigned_test = split_data(sequence_length, lead_time)
        
        callbacks = [
            EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
            ModelCheckpoint("best_model_dir_adjusted_event8,9.h5", monitor='val_loss', save_best_only=True)
                    ]
        
        return model.fit(
            X_train, Y_train,
            validation_data=(X_val, Y_val),
            epochs=kwargs.get('epochs', 100),
            batch_size=hp.Int('batch_size', min_value=16, max_value=64, step=16),
            callbacks=callbacks
        )

# 튜너 디렉토리 생성
tuner_dir = r'C:\Users\HydroLab\OneDrive\hp_tuner_use'

# 하이퍼파라미터 튜너 설정
tuner = kt.RandomSearch(
    MyHyperModel(),
    objective='val_loss',
    max_trials=20,
    executions_per_trial=3,
    directory=tuner_dir,
    project_name='dir_adjusted_event8,9.keras_2'
)

# 초기 데이터셋 분할
sequence_length_initial = 6
lead_time = 1
X_train, Y_train, X_val, Y_val, X_test_1, Y_test_1, X_test_2, Y_test_2, assigned_train, assigned_val, assigned_test = split_data(sequence_length_initial, lead_time)

# 각 데이터 세트의 크기 확인
print('Train set:')
print('X_train의 크기:', X_train.shape)
print('Y_train의 크기:', Y_train.shape)
print('Assigned Train Events:', assigned_train)
print('\nValidation set:')
print('X_val의 크기:', X_val.shape)
print('Y_val의 크기:', Y_val.shape)
print('Assigned Validation Events:', assigned_val)
print('\nTest set:')
print('X_test_1의 크기:', X_test_1.shape)
print('Y_test_1의 크기:', Y_test_1.shape)
print('X_test_2의 크기:', X_test_2.shape)
print('Y_test_2의 크기:', Y_test_2.shape)
print('Assigned Test Events:', assigned_test)

# 튜닝 실행
tuner.search(X_train, Y_train, epochs=100, validation_data=(X_val, Y_val))

# 최적 하이퍼파라미터 출력
best_hps = tuner.get_best_hyperparameters()[0]

print(f"""
The optimal number of units in the LSTM layer is {best_hps.get('units')}.
The optimal dropout rate is {best_hps.get('dropout')}.
The optimal learning rate for the optimizer is {best_hps.get('learning_rate')}.
The optimal sequence length is {best_hps.get('sequence_length')}.
The optimal batch size is {best_hps.get('batch_size')}.
""")


# 4. 예측시간 별 모델 학습 및 예측

## 4.0 전체 lead time 동시 모델 학습 및 예측 수행

In [None]:
def mean_absolute_percentage_error(y_true, y_pred):
    """
    MAPE 계산 함수
    """
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def evaluate_and_plot(Y_test_actual, Y_pred_actual, event_label, lead_time):
    mse = mean_squared_error(Y_test_actual, Y_pred_actual)
    mae = mean_absolute_error(Y_test_actual, Y_pred_actual)
    r2 = r2_score(Y_test_actual, Y_pred_actual)
    rmse = np.sqrt(mse)
    
    # 피크 값을 계산
    predicted_peak = np.max(Y_pred_actual)
    real_peak = np.max(Y_test_actual)
    
    # QER 계산
    qer = ((predicted_peak - real_peak) / real_peak) * 100
    
    # MAPE 계산
    mape = mean_absolute_percentage_error(Y_test_actual, Y_pred_actual)

    metrics = {'MSE': mse, 'MAE': mae, 'R²': r2, 'RMSE': rmse, 'QER': qer, 'MAPE': mape}

    # 지표 시각화
    fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(12, 8))
    for (metric, value), ax in zip(metrics.items(), axes.flatten()):
        ax.text(0.5, 0.5, f'{metric}\n{value:.6f}', fontsize=40, ha='center', va='center')
        ax.tick_params(axis='both', which='both', bottom=False, top=False, left=False, right=False, 
                       labelbottom=False, labelleft=False)
    plt.tight_layout()
    plt.show()

    # 실제값과 예측값 비교 그래프
    time_index = get_time_index(event_label, Y_test_actual)

    # 예측값을 리드 타임만큼 잘라서 관측값과 동일한 길이로 만듦
    trimmed_pred_actual = Y_pred_actual[lead_time:].flatten()

    # 관측값도 동일한 길이로 맞춤
    trimmed_test_actual = Y_test_actual[:len(trimmed_pred_actual)].flatten()
    trimmed_time_index = time_index[:len(trimmed_pred_actual)]
    
    plt.figure(figsize=(10, 5))
    plt.plot(trimmed_time_index, trimmed_test_actual, label='Actual', color='blue', linewidth=3)
    plt.plot(trimmed_time_index, trimmed_pred_actual, label='Predicted', color='red', linewidth=3)
    plt.title(f'Godal Bridge Water Level\nActual Vs Predicted\n({event_label}, lead time = {lead_time}hr)', fontsize=30, fontweight='bold')
    plt.xlabel('Date', fontsize=25, fontweight='bold')
    plt.ylabel('Water Level', fontsize=25, fontweight='bold')
    plt.xticks(rotation=45, fontsize=20)
    plt.yticks(fontsize=20)
    plt.legend(fontsize=18, loc='upper right')
    plt.show()

    # scatter plot
    plt.figure(figsize=(8, 8))
    plt.scatter(trimmed_test_actual, trimmed_pred_actual, color='blue')
    plt.plot([trimmed_test_actual.min(), trimmed_test_actual.max()], [trimmed_test_actual.min(), trimmed_test_actual.max()], color='red', lw=2)
    plt.title(f'Godal Bridge Water Level\nActual Vs Predicted\n({event_label}, lead time = {lead_time}hr)', fontsize=30, fontweight = 'bold')
    plt.xlabel('Actual', fontsize=25, fontweight = 'bold')
    plt.ylabel('Predicted', fontsize=25, fontweight = 'bold')
    plt.xticks(fontsize = 20)
    plt.yticks(fontsize = 20)
    plt.show()

    return mse, mae, r2, rmse, qer, mape

In [None]:
lead_times = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 24]

# get_time_index 함수 정의
def get_time_index(event_label, Y_test_actual):
    event_time_data = Rain_WL[Rain_WL['이벤트 번호'] == event_label]['관측 일시']
    time_index = event_time_data.iloc[-len(Y_test_actual):].reset_index(drop=True)
    return time_index


# 이벤트별로 지표를 저장할 시트 생성 및 데이터 저장 함수
def save_metrics_to_excel(wb, event_name, lead_times, mse_list, mae_list, nse_list, rmse_list):
    # 새로운 시트 생성
    ws = wb.create_sheet(title=event_name)
    
    # 열 제목 작성
    ws.append(["Lead Time (hours)", "MSE", "MAE", "NSE", "RMSE"])
    
    # 각 Lead time에 대한 평가지표 추가
    for i, lead_time in enumerate(lead_times):
        ws.append([lead_time, mse_list[i], mae_list[i], nse_list[i], rmse_list[i]])

# 이벤트별로 지표를 저장할 리스트 초기화
mse_all_lead_times_event_1 = []
mae_all_lead_times_event_1 = []
rmse_all_lead_times_event_1 = []
nse_all_lead_times_event_1 = []
qer_all_lead_times_event_1 = []
mape_all_lead_times_event_1 = []

mse_all_lead_times_event_2 = []
mae_all_lead_times_event_2 = []
rmse_all_lead_times_event_2 = []
nse_all_lead_times_event_2 = []
qer_all_lead_times_event_2 = []
mape_all_lead_times_event_2 = []

# Lead time 1~3 결과를 담아둘 딕셔너리 (Event#8 / Event#9)
y_true_by_lt_event1, y_pred_by_lt_event1, t_by_lt_event1 = {}, {}, {}
y_true_by_lt_event2, y_pred_by_lt_event2, t_by_lt_event2 = {}, {}, {}

# Lead Time별로 모델을 학습하고 지표를 저장
for lead_time in lead_times:
    set_all_seeds(42)

    # 최적 모델로 학습
    best_sequence_length = best_hps.get('sequence_length')
    best_units = best_hps.get('units')
    best_learning_rate = best_hps.get('learning_rate')
    best_dropout = best_hps.get('dropout')
    best_batch_size = best_hps.get('batch_size')

    X_train, Y_train, X_val, Y_val, X_test_1, Y_test_1, X_test_2, Y_test_2, assigned_train, assigned_val, assigned_test = split_data(best_sequence_length, lead_time)

    # MyHyperModel의 인스턴스 생성
    hypermodel = MyHyperModel()
    best_model = hypermodel.build(best_hps)

    # 모델 학습
    history = best_model.fit(X_train, Y_train, 
                             validation_data=(X_val, Y_val), 
                             epochs=100, 
                             batch_size=best_batch_size, 
                             callbacks=[
                                 EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
                                 ModelCheckpoint(f"best_model_with_tuning(직접예측, 예측선행시간적용X)_{lead_time}시간.keras", monitor='val_loss', save_best_only=True)
                             ])

    best_model.save(f'best_model_with_tuning(직접예측, 예측선행시간적용X)_{lead_time}시간.keras')
    
    # 예측
    Y_pred_best_1 = best_model.predict(X_test_1)
    Y_pred_best_2 = best_model.predict(X_test_2)

    # 정규화 역변환
    Y_pred_actual_1 = scaler_Y.inverse_transform(Y_pred_best_1)
    Y_test_actual_1 = scaler_Y.inverse_transform(Y_test_1)
    Y_pred_actual_2 = scaler_Y.inverse_transform(Y_pred_best_2)
    Y_test_actual_2 = scaler_Y.inverse_transform(Y_test_2)
    
    # --------- (여기부터 추가) Lead time 1~3만 저장 ----------
    if lead_time in [1, 2, 3]:
        # Event 1 (assigned_test[0]) 시간 인덱스
        t_idx_1 = get_time_index(assigned_test[0], Y_test_actual_1)
        y_true_by_lt_event1[lead_time] = Y_test_actual_1.flatten()
        y_pred_by_lt_event1[lead_time] = Y_pred_actual_1.flatten()
        t_by_lt_event1[lead_time]      = t_idx_1

        # Event 2 (assigned_test[1]) 시간 인덱스
        t_idx_2 = get_time_index(assigned_test[1], Y_test_actual_2)
        y_true_by_lt_event2[lead_time] = Y_test_actual_2.flatten()
        y_pred_by_lt_event2[lead_time] = Y_pred_actual_2.flatten()
        t_by_lt_event2[lead_time]      = t_idx_2
    # --------- (추가 끝) ----------
    
    # Event 6 평가 및 지표 저장
    mse, mae, r2, rmse, qer, mape = evaluate_and_plot(Y_test_actual_1, Y_pred_actual_1, f'{assigned_test[0]}', lead_time)
    mse_all_lead_times_event_1.append(mse)
    mae_all_lead_times_event_1.append(mae)
    nse_all_lead_times_event_1.append(r2)
    rmse_all_lead_times_event_1.append(rmse)
    qer_all_lead_times_event_1.append(qer)
    mape_all_lead_times_event_1.append(mape)
    

    # Event 7 평가 및 지표 저장
    mse, mae, r2, rmse, qer, mape = evaluate_and_plot(Y_test_actual_2, Y_pred_actual_2, f'{assigned_test[1]}', lead_time)
    mse_all_lead_times_event_2.append(mse)
    mae_all_lead_times_event_2.append(mae)
    nse_all_lead_times_event_2.append(r2)
    rmse_all_lead_times_event_2.append(rmse)
    qer_all_lead_times_event_2.append(qer)
    mape_all_lead_times_event_2.append(mape)

In [None]:
# 지표별 리드타임 그래프 그리기 (이벤트별)
def plot_metric_vs_lead_time_event(lead_times, metric_list_event, metric_name, event_label, color):
    plt.figure(figsize=(10, 6))
    plt.plot(lead_times, metric_list_event, marker='o', color=color, label=f'{metric_name} vs Lead Time')
    plt.title(f'{metric_name} by Lead time - {event_label}', fontsize=30, fontweight='bold')
    plt.xlabel('Lead Time (hours)', fontsize=25)
    plt.ylabel(metric_name, fontsize=25)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    plt.grid(True)
    plt.show()

lead_time_modified = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]

# Event 1
plot_metric_vs_lead_time_event(lead_time_modified, mse_all_lead_times_event_1[:12], 'MSE', assigned_test[0], 'blue')
plot_metric_vs_lead_time_event(lead_time_modified, mae_all_lead_times_event_1[:12], 'MAE', assigned_test[0], 'green')
plot_metric_vs_lead_time_event(lead_time_modified, rmse_all_lead_times_event_1[:12], 'RMSE', assigned_test[0], 'red')
plot_metric_vs_lead_time_event(lead_time_modified, nse_all_lead_times_event_1[:12], 'NSE', assigned_test[0], 'purple')
plot_metric_vs_lead_time_event(lead_time_modified, qer_all_lead_times_event_1[:12], 'QER', assigned_test[0], 'orange')
plot_metric_vs_lead_time_event(lead_time_modified, mape_all_lead_times_event_1[:12], 'MAPE', assigned_test[0], 'gray')

# Event 2
plot_metric_vs_lead_time_event(lead_time_modified, mse_all_lead_times_event_2[:12], 'MSE', assigned_test[1], 'blue')
plot_metric_vs_lead_time_event(lead_time_modified, mae_all_lead_times_event_2[:12], 'MAE', assigned_test[1], 'green')
plot_metric_vs_lead_time_event(lead_time_modified, rmse_all_lead_times_event_2[:12], 'RMSE', assigned_test[1], 'red')
plot_metric_vs_lead_time_event(lead_time_modified, nse_all_lead_times_event_2[:12], 'NSE', assigned_test[1], 'purple')
plot_metric_vs_lead_time_event(lead_time_modified, qer_all_lead_times_event_2[:12], 'QER', assigned_test[1], 'orange')
plot_metric_vs_lead_time_event(lead_time_modified, mape_all_lead_times_event_2[:12], 'MAPE', assigned_test[1], 'gray')

In [None]:
def plot_lt13_ts_and_scatter(
    y_true_by_lt: dict,      # {lead_time: array}
    y_pred_by_lt: dict,      # {lead_time: array}
    t_by_lt: dict,           # {lead_time: DatetimeIndex/Series}
    station_name: str = "Godal Bridge",
    event_label: str = "Event#8",
    scenario_label: str = "Direct Prediction (no forecasted rainfall)",
    save_path: str = None,
    day_interval: int = 1,
    custom_nse: dict = None  # ✅ 새로 추가된 인자. 예: {1:0.87, 2:0.81, 3:0.79}
):
    # --- NSE 계산 유틸 ---
    def _nse(y_true, y_pred):
        y_true = np.asarray(y_true, dtype=float)
        y_pred = np.asarray(y_pred, dtype=float)
        mask = np.isfinite(y_true) & np.isfinite(y_pred)
        y_true = y_true[mask]; y_pred = y_pred[mask]
        denom = np.sum((y_true - np.mean(y_true))**2)
        if denom == 0:
            return np.nan
        return 1.0 - np.sum((y_true - y_pred)**2) / denom

    # --- 이벤트별 y축 범위 설정 ---
    y_limits = None
    m = re.search(r'(\d+)', str(event_label))
    evnum = int(m.group(1)) if m else None
    if evnum == 8:
        y_limits = (44.7, 45.8)
    elif evnum == 9:
        y_limits = (44.5, 48)
    # (다른 이벤트는 자동 스케일: y_limits = None)

    # 1) 사용할 lead time 자동 선택(교집합에서 최대 3개)
    lt_keys = sorted(set(y_true_by_lt.keys()) & set(y_pred_by_lt.keys()))
    if not lt_keys:
        raise ValueError("y_true_by_lt와 y_pred_by_lt의 공통 lead time 키가 없습니다.")
    lead_times = lt_keys[:3]

    # 2) 유효성 검사 + 시간축 정리
    t_fixed = {}
    for lt in lead_times:
        yt = np.asarray(y_true_by_lt[lt]).reshape(-1)
        yp = np.asarray(y_pred_by_lt[lt]).reshape(-1)
        if len(yt) != len(yp):
            raise ValueError(f"lead time {lt}: 실제/예측 길이가 다릅니다. ({len(yt)} vs {len(yp)})")
        t = (t_by_lt or {}).get(lt, None)
        if t is None or len(t) != len(yt):
            t = pd.date_range("2000-01-01", periods=len(yt), freq="H")
        t_fixed[lt] = pd.to_datetime(t)

    # 3) 그리기
    plt.close('all')
    fig = plt.figure(figsize=(20, 15))
    fig.suptitle(
        f'{station_name} - {event_label}: Predicted Vs Actual\n'
        f'{scenario_label} (Lead time = {", ".join(map(str, lead_times))} hr)',
        fontsize=45, fontweight='bold', y=0.98
    )

    # --- 첫째 줄: 시계열 ---
    for i, lt in enumerate(lead_times, start=1):
        ax = fig.add_subplot(2, 3, i)
        tt = t_fixed[lt]
        yt = np.asarray(y_true_by_lt[lt]).reshape(-1)
        yp = np.asarray(y_pred_by_lt[lt]).reshape(-1)

        ax.plot(tt, yt, label='Actual',  linewidth=2, color='blue')
        ax.plot(tt, yp, label='Predicted', linewidth=2, color='red')

        ax.set_title(f'Lead Time = {lt} hr', fontsize=35, fontweight='bold', pad=10)
        ax.set_xlabel('Date', fontsize=30, fontweight='bold')
        ax.set_ylabel('Water Level(EL.m)', fontsize=30, fontweight='bold')

        ax.xaxis.set_major_locator(mdates.DayLocator(interval=day_interval))
        ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))

        if y_limits is not None:
            ax.set_ylim(*y_limits)

        ax.tick_params(axis='x', labelsize=25, rotation=45)
        ax.tick_params(axis='y', labelsize=25)
        ax.grid(True)

        if i == 1:
            ax.legend(fontsize=18)

    # --- 둘째 줄: 산점도 + NSE ---
    for j, lt in enumerate(lead_times, start=4):
        ax = fig.add_subplot(2, 3, j)
        yt = np.asarray(y_true_by_lt[lt]).reshape(-1)
        yp = np.asarray(y_pred_by_lt[lt]).reshape(-1)

        ax.scatter(yt, yp, s=18, alpha=0.8, color='blue')

        # y=x 기준선 및 축 범위
        if y_limits is not None:
            lo, hi = y_limits
        else:
            lo = float(np.nanmin([yt.min(), yp.min()]))
            hi = float(np.nanmax([yt.max(), yp.max()]))

        ax.plot([lo, hi], [lo, hi], linewidth=2, color='red')  # y=x
        ax.set_ylim(lo, hi)
        ax.set_xlim(lo, hi)

        # 회귀선
#         if np.isfinite(yt).all() and np.isfinite(yp).all() and len(yt) >= 2:
#             m, b = np.polyfit(yt, yp, 1)
#             xr = np.linspace(lo, hi, 100)
#             ax.plot(xr, m * xr + b, linewidth=2)

        # ✅ NSE 표시 (사용자 지정 시 대체)
        if custom_nse and lt in custom_nse:
            nse_val = custom_nse[lt]
        else:
            nse_val = _nse(yt, yp)

        ax.text(
            0.95, 0.05, f'NSE: {nse_val:.4f}',
            transform=ax.transAxes,
            fontsize=25, fontweight='bold',
            va='bottom', ha='right',
            bbox=dict(facecolor='white', alpha=0.7, edgecolor='none')
        )

        ax.set_xlabel('Actual(EL.m)', fontsize=30, fontweight='bold')
        ax.set_ylabel('Predicted(EL.m)', fontsize=30, fontweight='bold')
        ax.tick_params(axis='x', labelsize=25)
        ax.tick_params(axis='y', labelsize=25)
        ax.grid(True)

    plt.tight_layout(rect=[0, 0, 1, 0.95])
    if save_path:
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
    plt.show()

In [None]:
# ==== 예시 입력부: 여러분의 변수명으로 치환하세요 ====
# Event#8, Lead time 1~3에 해당하는 실제/예측/시간 인덱스
scenario_label = "Direct Prediction (without Forecasted rainfall)"
# 예) DP + 미사용: "Direct Prediction (Observed rainfall only)"
# 예) RP + 사용:   "Recursive Prediction (with Forecasted Rainfall)"
# 예) RP + 미사용: "Recursive Prediction (Observed rainfall only)"

custom_nse_vals_1 = {1: 0.9575, 2: 0.9441, 3: 0.9335}

# Event#8 (보통 assigned_test[0])
plot_lt13_ts_and_scatter(
    y_true_by_lt_event1,
    y_pred_by_lt_event1,
    t_by_lt_event1,
    station_name="Godal Bridge",
    event_label=assigned_test[0],
    scenario_label=scenario_label,
    custom_nse=custom_nse_vals_1,
    save_path=f"{scenario_label.replace(' ', '_')}_{assigned_test[0]}_LT1-3.png",
)

custom_nse_vals_2= {1: 0.9431, 2: 0.9056, 3: 0.8899}

# Event #9 (간격을 6일로)
plot_lt13_ts_and_scatter(
    y_true_by_lt_event2,
    y_pred_by_lt_event2,
    t_by_lt_event2,
    station_name="Godal Bridge",
    event_label=assigned_test[1],
    scenario_label=scenario_label,
    custom_nse=custom_nse_vals_2,
    save_path=f"{scenario_label.replace(' ', '_')}_{assigned_test[1]}_LT1-3.png",
    day_interval=6   # <-- 간격 크게
)

In [None]:
# 지표별 리드타임 그래프 그리기 (이벤트별)
def plot_metric_vs_lead_time_event(lead_times, metric_list_event, metric_name, event_label, color):
    plt.figure(figsize=(10, 6))
    plt.plot(lead_times, metric_list_event, marker='o', color=color, label=f'{metric_name} vs Lead Time')
    plt.title(f'{metric_name} by Lead time - {event_label}', fontsize=30, fontweight='bold')
    plt.xlabel('Lead Time (hours)', fontsize=25)
    plt.ylabel(metric_name, fontsize=25)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    plt.grid(True)
    plt.show()

lead_time_modified = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]

# Event 1
plot_metric_vs_lead_time_event(lead_time_modified, mse_all_lead_times_event_1[:12], 'MSE', assigned_test[0], 'blue')
plot_metric_vs_lead_time_event(lead_time_modified, mae_all_lead_times_event_1[:12], 'MAE', assigned_test[0], 'green')
plot_metric_vs_lead_time_event(lead_time_modified, rmse_all_lead_times_event_1[:12], 'RMSE', assigned_test[0], 'red')
plot_metric_vs_lead_time_event(lead_time_modified, nse_all_lead_times_event_1[:12], 'NSE', assigned_test[0], 'purple')
plot_metric_vs_lead_time_event(lead_time_modified, qer_all_lead_times_event_1[:12], 'QER', assigned_test[0], 'orange')
plot_metric_vs_lead_time_event(lead_time_modified, mape_all_lead_times_event_1[:12], 'MAPE', assigned_test[0], 'gray')

# Event 2
plot_metric_vs_lead_time_event(lead_time_modified, mse_all_lead_times_event_2[:12], 'MSE', assigned_test[1], 'blue')
plot_metric_vs_lead_time_event(lead_time_modified, mae_all_lead_times_event_2[:12], 'MAE', assigned_test[1], 'green')
plot_metric_vs_lead_time_event(lead_time_modified, rmse_all_lead_times_event_2[:12], 'RMSE', assigned_test[1], 'red')
plot_metric_vs_lead_time_event(lead_time_modified, nse_all_lead_times_event_2[:12], 'NSE', assigned_test[1], 'purple')
plot_metric_vs_lead_time_event(lead_time_modified, qer_all_lead_times_event_2[:12], 'QER', assigned_test[1], 'orange')
plot_metric_vs_lead_time_event(lead_time_modified, mape_all_lead_times_event_2[:12], 'MAPE', assigned_test[1], 'gray')

In [None]:
import matplotlib.pyplot as plt
import matplotlib

# 유니코드에서 마이너스 기호를 제대로 표시하도록 설정
matplotlib.rcParams['axes.unicode_minus'] = False

# 2x2 배열로 지표별 리드 타임 그래프 그리기 함수
def plot_metrics_2x2_lead_time(lead_times, metrics_dict, event_label, colors):
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))  # 2x2 배열 서브플롯 생성
    fig.suptitle('[Direct Prediction]', fontsize=40, fontweight='bold')

    metric_names = ['MAE', 'RMSE', 'NSE', 'QER']  # 그릴 지표들
    metric_units = {'MAE': '(m)', 'RMSE': '(m)', 'NSE': '', 'QER': '(%)'}  # 지표별 단위
    for i, metric_name in enumerate(metric_names):
        ax = axes[i//2, i%2]  # 2x2 배열에서 위치 계산
        
        # 각 지표를 lead time에 대해 그리기
        ax.plot(lead_times, metrics_dict[metric_name], marker='o', markersize = 10, color=colors[metric_name], label=f'{metric_name} vs Lead Time')
        ax.set_title(f'{metric_name} by Lead Time - {event_label}', fontsize=30, fontweight='bold')
        ax.set_xlabel('Lead Time (hours)', fontsize=25, fontweight='bold')
        # y축 제목에 단위 추가
        ax.set_ylabel(f'{metric_name} {metric_units[metric_name]}', fontsize=25, fontweight='bold')
        ax.tick_params(axis='x', labelsize=20)
        ax.tick_params(axis='y', labelsize=20)
        ax.grid(True)
        ax.legend(fontsize=20)

    plt.tight_layout(rect=[0, 0, 1, 0.95])  # 타이틀과 서브플롯 간격 조정
    plt.show()

# Lead time 정의
lead_time_modified = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]

# 지표별 색상 지정
colors = {
    'MAE': 'green',
    'RMSE': 'red',
    'NSE': 'purple',
    'QER': 'orange'
}

# Event 1에 대한 지표 저장 및 그래프 그리기
metrics_event_1 = {
    'MAE': mae_all_lead_times_event_1[:12],
    'RMSE': rmse_all_lead_times_event_1[:12],
    'NSE': nse_all_lead_times_event_1[:12],
    'QER': qer_all_lead_times_event_1[:12]
}
plot_metrics_2x2_lead_time(lead_time_modified, metrics_event_1, assigned_test[0], colors)

# Event 2에 대한 지표 저장 및 그래프 그리기
metrics_event_2 = {
    'MAE': mae_all_lead_times_event_2[:12],
    'RMSE': rmse_all_lead_times_event_2[:12],
    'NSE': nse_all_lead_times_event_2[:12],
    'QER': qer_all_lead_times_event_2[:12]
}
plot_metrics_2x2_lead_time(lead_time_modified, metrics_event_2, assigned_test[1], colors)

In [None]:
# Excel 파일 작성 함수
def create_excel_with_metrics(lead_times, metric_dict_event, event_label, writer):
    # 각 이벤트에 대해 새로운 시트를 생성합니다.
    df = pd.DataFrame(metric_dict_event, index=lead_times)
    df.index.name = 'Lead Time (hours)'
    df.to_excel(writer, sheet_name=event_label)

# 이벤트별로 지표를 계산하여 Excel에 저장
with pd.ExcelWriter('metrics_by_lead_time(Event8, 9).xlsx') as writer:
    
    # Event 1
    metric_dict_event_1 = {
        'MSE': mse_all_lead_times_event_1[:12],
        'MAE': mae_all_lead_times_event_1[:12],
        'RMSE': rmse_all_lead_times_event_1[:12],
        'NSE': nse_all_lead_times_event_1[:12],
        'QER': qer_all_lead_times_event_1[:12],
        'MAPE': mape_all_lead_times_event_1[:12]
    }
    create_excel_with_metrics(lead_time_modified, metric_dict_event_1, assigned_test[0], writer)

    # Event 2
    metric_dict_event_2 = {
        'MSE': mse_all_lead_times_event_2[:12],
        'MAE': mae_all_lead_times_event_2[:12],
        'RMSE': rmse_all_lead_times_event_2[:12],
        'NSE': nse_all_lead_times_event_2[:12],
        'QER': qer_all_lead_times_event_2[:12],
        'MAPE': mape_all_lead_times_event_2[:12]
    }
    create_excel_with_metrics(lead_time_modified, metric_dict_event_2, assigned_test[1], writer)

In [None]:
import matplotlib.dates as mdates

# NSE 계산 함수 (Nash-Sutcliffe Efficiency)
def calculate_nse(y_true, y_pred):
    return 1 - (np.sum((y_true - y_pred) ** 2) / np.sum((y_true - np.mean(y_true)) ** 2))

# 리드 타임별 예측 결과를 3x4 배열로 출력하는 함수 (line plot)
def plot_predictions_grid_line(Y_test_actual, Y_pred_actual, event_label):
    fig, axes = plt.subplots(3, 4, figsize=(20, 15))  # 3x4 크기의 서브플롯 생성
    fig.suptitle(f'Predictions vs Actual (Line Plot) for Event {event_label}', fontsize=40, fontweight='bold')

    for i, ax in enumerate(axes.flat):  # 12개의 리드 타임에 대해 반복
        lead_time = i + 1  # 리드 타임 설정
        Y_pred = Y_pred_actual[i]
        Y_test = Y_test_actual[i]

        # 시간 인덱스 생성
        time_index = get_time_index(event_label, Y_test)

        # Line plot 그리기 (Actual vs Predicted)
        ax.plot(time_index, Y_test, label='Actual', color='blue', linewidth=3)
        ax.plot(time_index, Y_pred, label='Predicted', color='red', linewidth=3)

            # X축 날짜 포맷 설정 (년-월-일)
        if event_label == assigned_test[0]:
            ax.xaxis.set_major_locator(mdates.DayLocator(interval=1))  # 1일 간격으로 날짜 표시
            ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))  # 포맷을 년-월-일로 설정
        if event_label == assigned_test[1]:
            ax.xaxis.set_major_locator(mdates.DayLocator(interval=6))  # 1일 간격으로 날짜 표시
            ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))  # 포맷을 년-월-일로 설정
        ax.tick_params(axis='x', labelsize=20, rotation=45)  # X축 라벨 크기 및 회전 설정
        ax.tick_params(axis='y', labelsize=20)  # Y축 라벨 크기 설정

        ax.set_title(f'Lead Time {lead_time} Hours', fontsize=30, fontweight='bold')
        ax.set_xlabel('Date', fontsize=25, fontweight='bold')
        ax.set_ylabel('Water Level(EL.m)', fontsize=25, fontweight='bold')
        
        if lead_time == 1:  # 첫 번째 그래프에만 범례 추가
            ax.legend(fontsize=17)

    plt.tight_layout(rect=[0, 0, 1, 0.95])  # 타이틀 영역 확보
    plt.show()

# 리드 타임별 예측 결과를 3x4 배열로 출력하는 함수 (scatter plot + NSE 표시)
def plot_predictions_grid_scatter_nse(Y_test_actual, Y_pred_actual, event_label):
    fig, axes = plt.subplots(3, 4, figsize=(20, 18))  # 3x4 크기의 서브플롯 생성
    fig.suptitle(f'Predictions vs Actual (Scatter Plot) for Event {event_label}', fontsize=40, fontweight='bold')

    for i, ax in enumerate(axes.flat):  # 12개의 리드 타임에 대해 반복
        lead_time = i + 1  # 리드 타임 설정
        Y_pred = Y_pred_actual[i]
        Y_test = Y_test_actual[i]

        # Scatter plot 그리기 (Actual vs Predicted)
        ax.scatter(Y_test, Y_pred, label='Predicted', color='blue', alpha=0.5)
        ax.plot([min(Y_test), max(Y_test)], [min(Y_test), max(Y_test)], color='red', linewidth=2)  # y = x 선

        # NSE 계산 및 표시
        nse_value = calculate_nse(Y_test, Y_pred)
        ax.text(0.95, 0.05, f'NSE: {nse_value:.4f}', transform=ax.transAxes, fontsize=28, verticalalignment='bottom', horizontalalignment='right',
                fontweight='bold', bbox=dict(facecolor='white', alpha=0.7))  # NSE 값을 표시하면서 반투명 흰색 배경 추가

        ax.set_title(f'Lead Time {lead_time} Hours', fontsize=30, fontweight='bold')
        ax.set_xlabel('Actual(EL.m)', fontsize=25, fontweight='bold')
        ax.set_ylabel('Predicted(EL.m)', fontsize=25, fontweight='bold')

        ax.tick_params(axis='x', labelsize=20)
        ax.tick_params(axis='y', labelsize=20)

    plt.tight_layout(rect=[0, 0, 1, 0.95])  # 타이틀 영역 확보
    plt.show()

# 각 리드 타임에 대해 예측값을 저장할 리스트 초기화 (이벤트별로 따로 저장)
Y_test_actual_event1 = []
Y_pred_actual_event1 = []

Y_test_actual_event2 = []
Y_pred_actual_event2 = []

# 리드 타임별로 예측 수행 후 각각의 이벤트에 대해 리스트에 저장
for lead_time in range(1, 13):  # 1~12시간 리드 타임에 대해 반복
    # 데이터 분할
    X_train, Y_train, X_val, Y_val, X_test_1, Y_test_1, X_test_2, Y_test_2, assigned_train, assigned_val, assigned_test = split_data(best_hps.get('sequence_length'), lead_time)
    
    # 모델 훈련
    history = best_model.fit(X_train, Y_train, validation_data=(X_val, Y_val), epochs=100, batch_size=best_hps.get('batch_size'), callbacks=[
        EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
        ModelCheckpoint(f"best_model_with_tuning(직접예측, 예측선행시간적용X)_{lead_time}시간.keras", monitor='val_loss', save_best_only=True)
    ])
    
    # 예측 수행 (이벤트 1에 대한 예측)
    Y_pred_best_1 = best_model.predict(X_test_1)
    Y_pred_actual_1 = scaler_Y.inverse_transform(Y_pred_best_1)
    Y_test_actual_1 = scaler_Y.inverse_transform(Y_test_1)
    Y_test_actual_event1.append(Y_test_actual_1)
    Y_pred_actual_event1.append(Y_pred_actual_1)
    
    # 예측 수행 (이벤트 2에 대한 예측)
    Y_pred_best_2 = best_model.predict(X_test_2)
    Y_pred_actual_2 = scaler_Y.inverse_transform(Y_pred_best_2)
    Y_test_actual_2 = scaler_Y.inverse_transform(Y_test_2)
    Y_test_actual_event2.append(Y_test_actual_2)
    Y_pred_actual_event2.append(Y_pred_actual_2)

# Event 1에 대해 각각 line plot과 scatter plot을 출력
plot_predictions_grid_line(Y_test_actual_event1, Y_pred_actual_event1, assigned_test[0])  # 수위 비교 line plot
plot_predictions_grid_scatter_nse(Y_test_actual_event1, Y_pred_actual_event1, assigned_test[0])  # scatter plot with NSE

# Event 2에 대해 각각 line plot과 scatter plot을 출력
plot_predictions_grid_line(Y_test_actual_event2, Y_pred_actual_event2, assigned_test[1])
plot_predictions_grid_scatter_nse(Y_test_actual_event2, Y_pred_actual_event2, assigned_test[1])

In [None]:
# Excel 파일 작성 함수
def create_excel_with_metrics(lead_times, metric_dict_event, event_label, writer):
    # 각 이벤트에 대해 새로운 시트를 생성합니다.
    df = pd.DataFrame(metric_dict_event, index=lead_times)
    df.index.name = 'Lead Time (hours)'
    df.to_excel(writer, sheet_name=event_label)

# 이벤트별로 지표를 계산하여 Excel에 저장
with pd.ExcelWriter('metrics_by_lead_time_직접예측_예보강우미사용(Event8,9).xlsx') as writer:
    
    # Event 1
    metric_dict_event_1 = {
        'MSE': mse_all_lead_times_event_1[:12],
        'MAE': mae_all_lead_times_event_1[:12],
        'RMSE': rmse_all_lead_times_event_1[:12],
        'NSE': nse_all_lead_times_event_1[:12],
        'QER': qer_all_lead_times_event_1[:12],
        'MAPE': mape_all_lead_times_event_1[:12]
    }
    create_excel_with_metrics(lead_time_modified, metric_dict_event_1, assigned_test[0], writer)

    # Event 2
    metric_dict_event_2 = {
        'MSE': mse_all_lead_times_event_2[:12],
        'MAE': mae_all_lead_times_event_2[:12],
        'RMSE': rmse_all_lead_times_event_2[:12],
        'NSE': nse_all_lead_times_event_2[:12],
        'QER': qer_all_lead_times_event_2[:12],
        'MAPE': mape_all_lead_times_event_2[:12]
    }
    create_excel_with_metrics(lead_time_modified, metric_dict_event_2, assigned_test[1], writer)

## 4.1 1hr 모델

In [None]:
set_all_seeds(42)

# 최적 모델로 학습
best_sequence_length = best_hps.get('sequence_length')
best_units = best_hps.get('units')
best_learning_rate = best_hps.get('learning_rate')
best_dropout = best_hps.get('dropout')
best_batch_size = best_hps.get('batch_size')
lead_time = 1


X_train, Y_train, X_val, Y_val, X_test_1, Y_test_1, X_test_2, Y_test_2, X_test_3, Y_test_3, assigned_train, assigned_val, assigned_test = split_data(best_sequence_length, lead_time)


# MyHyperModel의 인스턴스 생성
hypermodel = MyHyperModel()
best_model = hypermodel.build(best_hps)

# 모델 학습
history = best_model.fit(X_train, Y_train, 
                         validation_data=(X_val, Y_val), 
                         epochs=100, 
                         batch_size=best_batch_size, 
                         callbacks=[
                             EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
                             ModelCheckpoint("best_model_with_tuning(예측선행시간적용O)_1시간.h5", monitor='val_loss', save_best_only=True)
                         ])

best_model.save('best_model_with_tuning(예측선행시간적용O)_1시간.h5')

In [None]:
# 손실 그래프
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Training Loss', color='blue')
plt.plot(history.history['val_loss'], label='Validation Loss', color='red')
plt.title('Training and Validation Loss', fontsize=22)
plt.xlabel('Epoch', fontsize=20)
plt.ylabel('Loss', fontsize=20)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.legend(fontsize=15, loc = 'upper right')
plt.show()

In [None]:
# 예측
Y_pred_best_1 = best_model.predict(X_test_1)
Y_pred_best_2 = best_model.predict(X_test_2)
Y_pred_best_3 = best_model.predict(X_test_3)

# 정규화 역변환
Y_pred_actual_1 = scaler_Y.inverse_transform(Y_pred_best_1)
Y_test_actual_1 = scaler_Y.inverse_transform(Y_test_1)
Y_pred_actual_2 = scaler_Y.inverse_transform(Y_pred_best_2)
Y_test_actual_2 = scaler_Y.inverse_transform(Y_test_2)
Y_pred_actual_3 = scaler_Y.inverse_transform(Y_pred_best_3)
Y_test_actual_3 = scaler_Y.inverse_transform(Y_test_3)


In [None]:
# Event 1에 해당하는 관측 일시 데이터를 가져오기
def get_time_index(event_label, Y_test_actual):
    event_time_data = Rain_WL[Rain_WL['이벤트 번호'] == event_label]['관측 일시']
    time_index = event_time_data.iloc[-len(Y_test_actual):].reset_index(drop=True)
    return time_index

In [None]:
# 모델 성능 평가 및 시각화 함수
def evaluate_and_plot(Y_test_actual, Y_pred_actual, event_label, lead_time):
    mse = mean_squared_error(Y_test_actual, Y_pred_actual)
    mae = mean_absolute_error(Y_test_actual, Y_pred_actual)
    r2 = r2_score(Y_test_actual, Y_pred_actual)
    rmse = np.sqrt(mse)

    metrics = {'MSE': mse, 'MAE': mae, 'NSE': r2, 'RMSE': rmse}

    # 지표 시각화
    fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(12, 8))
    for (metric, value), ax in zip(metrics.items(), axes.flatten()):
        ax.text(0.5, 0.5, f'{metric}\n{value:.6f}', fontsize=40, ha='center', va='center')
        ax.tick_params(axis='both', which='both', bottom=False, top=False, left=False, right=False, 
                       labelbottom=False, labelleft=False)
    plt.tight_layout()
    plt.show()

    # 실제값과 예측값 비교 그래프
    time_index = get_time_index(event_label, Y_test_actual)

    # 예측값을 리드 타임만큼 잘라서 관측값과 동일한 길이로 만듦
    trimmed_pred_actual = Y_pred_actual[lead_time:].flatten()

    # 관측값도 동일한 길이로 맞춤
    trimmed_test_actual = Y_test_actual[:len(trimmed_pred_actual)].flatten()
    trimmed_time_index = time_index[:len(trimmed_pred_actual)]
    
    plt.figure(figsize=(10, 5))
    plt.plot(trimmed_time_index, trimmed_test_actual, label='실제 값', color='blue', linewidth=3)
    plt.plot(trimmed_time_index, trimmed_pred_actual, label='예측 값', color='red', linewidth=3)
    plt.title(f'고달교 수위 예측값과 실제 값 비교: {event_label}', fontsize=30, fontweight='bold')
    plt.xlabel('관측 일시', fontsize=25, fontweight='bold')
    plt.ylabel('고달교 수위', fontsize=25, fontweight='bold')
    plt.xticks(rotation=45, fontsize=20)
    plt.yticks(fontsize=20)
    plt.legend(fontsize=18, loc='upper right')
    plt.show()

    # scatter plot
    plt.figure(figsize=(8, 8))
    plt.scatter(trimmed_test_actual, trimmed_pred_actual, color='blue')
    plt.plot([trimmed_test_actual.min(), trimmed_test_actual.max()], [trimmed_test_actual.min(), trimmed_test_actual.max()], color='red', lw=2)
    plt.title(f'실제 값 vs. 예측 값: {event_label}', fontsize=30, fontweight = 'bold')
    plt.xlabel('실제 값', fontsize=25, fontweight = 'bold')
    plt.ylabel('예측 값', fontsize=25, fontweight = 'bold')
    plt.xticks(fontsize = 20)
    plt.yticks(fontsize = 20)
    plt.show()

In [None]:
lead_times = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 24]

mse_list = []
mae_list = []
nse_list = []
rmse_list = []

In [None]:
# Event 10 평가 및 시각화
evaluate_and_plot(Y_test_actual_1, Y_pred_actual_1, f'{assigned_test[2]}', lead_time)

# Event 9 평가 및 시각화
evaluate_and_plot(Y_test_actual_2, Y_pred_actual_2, f'{assigned_test[1]}', lead_time)

# Event 8 평가 및 시각화
evaluate_and_plot(Y_test_actual_3, Y_pred_actual_3, f'{assigned_test[0]}', lead_time)

## 4.2 2hr 모델

In [None]:
set_all_seeds(42)

# 최적 모델로 학습
best_sequence_length = best_hps.get('sequence_length')
best_units = best_hps.get('units')
best_learning_rate = best_hps.get('learning_rate')
best_dropout = best_hps.get('dropout')
best_batch_size = best_hps.get('batch_size')
lead_time = 2


X_train, Y_train, X_val, Y_val, X_test_1, Y_test_1, X_test_2, Y_test_2, X_test_3, Y_test_3, assigned_train, assigned_val, assigned_test = split_data(best_sequence_length, lead_time)


# MyHyperModel의 인스턴스 생성
hypermodel = MyHyperModel()
best_model = hypermodel.build(best_hps)

# 모델 학습
history = best_model.fit(X_train, Y_train, 
                         validation_data=(X_val, Y_val), 
                         epochs=100, 
                         batch_size=best_batch_size, 
                         callbacks=[
                             EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
                             ModelCheckpoint("best_model_with_tuning(예측선행시간적용O)_2시간.h5", monitor='val_loss', save_best_only=True)
                         ])

best_model.save('best_model_with_tuning(예측선행시간적용O)_2시간.h5')

In [None]:
# 손실 그래프
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Training Loss', color='blue')
plt.plot(history.history['val_loss'], label='Validation Loss', color='red')
plt.title('Training and Validation Loss', fontsize=22)
plt.xlabel('Epoch', fontsize=20)
plt.ylabel('Loss', fontsize=20)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.legend(fontsize=15, loc = 'upper right')
plt.show()

In [None]:
# 예측
Y_pred_best_1 = best_model.predict(X_test_1)
Y_pred_best_2 = best_model.predict(X_test_2)
Y_pred_best_3 = best_model.predict(X_test_3)

# 정규화 역변환
Y_pred_actual_1 = scaler_Y.inverse_transform(Y_pred_best_1)
Y_test_actual_1 = scaler_Y.inverse_transform(Y_test_1)
Y_pred_actual_2 = scaler_Y.inverse_transform(Y_pred_best_2)
Y_test_actual_2 = scaler_Y.inverse_transform(Y_test_2)
Y_pred_actual_3 = scaler_Y.inverse_transform(Y_pred_best_3)
Y_test_actual_3 = scaler_Y.inverse_transform(Y_test_3)


In [None]:
# Event 10 평가 및 시각화
evaluate_and_plot(Y_test_actual_1, Y_pred_actual_1, f'{assigned_test[2]}', lead_time)

# Event 9 평가 및 시각화
evaluate_and_plot(Y_test_actual_2, Y_pred_actual_2, f'{assigned_test[1]}', lead_time)

# Event 8 평가 및 시각화
evaluate_and_plot(Y_test_actual_3, Y_pred_actual_3, f'{assigned_test[0]}', lead_time)

## 4.3 3hr 모델

In [None]:
set_all_seeds(42)

# 최적 모델로 학습
best_sequence_length = best_hps.get('sequence_length')
best_units = best_hps.get('units')
best_learning_rate = best_hps.get('learning_rate')
best_dropout = best_hps.get('dropout')
best_batch_size = best_hps.get('batch_size')
lead_time = 3


X_train, Y_train, X_val, Y_val, X_test_1, Y_test_1, X_test_2, Y_test_2, X_test_3, Y_test_3, assigned_train, assigned_val, assigned_test = split_data(best_sequence_length, lead_time)


# MyHyperModel의 인스턴스 생성
hypermodel = MyHyperModel()
best_model = hypermodel.build(best_hps)

# 모델 학습
history = best_model.fit(X_train, Y_train, 
                         validation_data=(X_val, Y_val), 
                         epochs=100, 
                         batch_size=best_batch_size, 
                         callbacks=[
                             EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
                             ModelCheckpoint("best_model_with_tuning(예측선행시간적용O)_3시간.h5", monitor='val_loss', save_best_only=True)
                         ])

best_model.save('best_model_with_tuning(예측선행시간적용O)_3시간.h5')

In [None]:
# 손실 그래프
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Training Loss', color='blue')
plt.plot(history.history['val_loss'], label='Validation Loss', color='red')
plt.title('Training and Validation Loss', fontsize=22)
plt.xlabel('Epoch', fontsize=20)
plt.ylabel('Loss', fontsize=20)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.legend(fontsize=15, loc = 'upper right')
plt.show()

In [None]:
# 예측
Y_pred_best_1 = best_model.predict(X_test_1)
Y_pred_best_2 = best_model.predict(X_test_2)
Y_pred_best_3 = best_model.predict(X_test_3)

# 정규화 역변환
Y_pred_actual_1 = scaler_Y.inverse_transform(Y_pred_best_1)
Y_test_actual_1 = scaler_Y.inverse_transform(Y_test_1)
Y_pred_actual_2 = scaler_Y.inverse_transform(Y_pred_best_2)
Y_test_actual_2 = scaler_Y.inverse_transform(Y_test_2)
Y_pred_actual_3 = scaler_Y.inverse_transform(Y_pred_best_3)
Y_test_actual_3 = scaler_Y.inverse_transform(Y_test_3)


In [None]:
# Event 10 평가 및 시각화
evaluate_and_plot(Y_test_actual_1, Y_pred_actual_1, f'{assigned_test[2]}', lead_time)

# Event 9 평가 및 시각화
evaluate_and_plot(Y_test_actual_2, Y_pred_actual_2, f'{assigned_test[1]}', lead_time)

# Event 8 평가 및 시각화
evaluate_and_plot(Y_test_actual_3, Y_pred_actual_3, f'{assigned_test[0]}', lead_time)

## 4.4 4hr 모델

In [None]:
set_all_seeds(42)

# 최적 모델로 학습
best_sequence_length = best_hps.get('sequence_length')
best_units = best_hps.get('units')
best_learning_rate = best_hps.get('learning_rate')
best_dropout = best_hps.get('dropout')
best_batch_size = best_hps.get('batch_size')
lead_time = 4


X_train, Y_train, X_val, Y_val, X_test_1, Y_test_1, X_test_2, Y_test_2, X_test_3, Y_test_3, assigned_train, assigned_val, assigned_test = split_data(best_sequence_length, lead_time)


# MyHyperModel의 인스턴스 생성
hypermodel = MyHyperModel()
best_model = hypermodel.build(best_hps)

# 모델 학습
history = best_model.fit(X_train, Y_train, 
                         validation_data=(X_val, Y_val), 
                         epochs=100, 
                         batch_size=best_batch_size, 
                         callbacks=[
                             EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
                             ModelCheckpoint("best_model_with_tuning(예측선행시간적용O)_4시간.h5", monitor='val_loss', save_best_only=True)
                         ])

best_model.save('best_model_with_tuning(예측선행시간적용O)_4시간.h5')

In [None]:
# 손실 그래프
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Training Loss', color='blue')
plt.plot(history.history['val_loss'], label='Validation Loss', color='red')
plt.title('Training and Validation Loss', fontsize=22)
plt.xlabel('Epoch', fontsize=20)
plt.ylabel('Loss', fontsize=20)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.legend(fontsize=15, loc = 'upper right')
plt.show()

In [None]:
# 예측
Y_pred_best_1 = best_model.predict(X_test_1)
Y_pred_best_2 = best_model.predict(X_test_2)
Y_pred_best_3 = best_model.predict(X_test_3)

# 정규화 역변환
Y_pred_actual_1 = scaler_Y.inverse_transform(Y_pred_best_1)
Y_test_actual_1 = scaler_Y.inverse_transform(Y_test_1)
Y_pred_actual_2 = scaler_Y.inverse_transform(Y_pred_best_2)
Y_test_actual_2 = scaler_Y.inverse_transform(Y_test_2)
Y_pred_actual_3 = scaler_Y.inverse_transform(Y_pred_best_3)
Y_test_actual_3 = scaler_Y.inverse_transform(Y_test_3)


In [None]:
# Event 10 평가 및 시각화
evaluate_and_plot(Y_test_actual_1, Y_pred_actual_1, f'{assigned_test[2]}', lead_time)

# Event 9 평가 및 시각화
evaluate_and_plot(Y_test_actual_2, Y_pred_actual_2, f'{assigned_test[1]}', lead_time)

# Event 8 평가 및 시각화
evaluate_and_plot(Y_test_actual_3, Y_pred_actual_3, f'{assigned_test[0]}', lead_time)

## 4.5 5hr 모델

In [None]:
set_all_seeds(42)

# 최적 모델로 학습
best_sequence_length = best_hps.get('sequence_length')
best_units = best_hps.get('units')
best_learning_rate = best_hps.get('learning_rate')
best_dropout = best_hps.get('dropout')
best_batch_size = best_hps.get('batch_size')
lead_time = 5


X_train, Y_train, X_val, Y_val, X_test_1, Y_test_1, X_test_2, Y_test_2, X_test_3, Y_test_3, assigned_train, assigned_val, assigned_test = split_data(best_sequence_length, lead_time)


# MyHyperModel의 인스턴스 생성
hypermodel = MyHyperModel()
best_model = hypermodel.build(best_hps)

# 모델 학습
history = best_model.fit(X_train, Y_train, 
                         validation_data=(X_val, Y_val), 
                         epochs=100, 
                         batch_size=best_batch_size, 
                         callbacks=[
                             EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
                             ModelCheckpoint("best_model_with_tuning(예측선행시간적용O)_5시간.h5", monitor='val_loss', save_best_only=True)
                         ])

best_model.save('best_model_with_tuning(예측선행시간적용O)_5시간.h5')

In [None]:
# 손실 그래프
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Training Loss', color='blue')
plt.plot(history.history['val_loss'], label='Validation Loss', color='red')
plt.title('Training and Validation Loss', fontsize=22)
plt.xlabel('Epoch', fontsize=20)
plt.ylabel('Loss', fontsize=20)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.legend(fontsize=15, loc = 'upper right')
plt.show()

In [None]:
# 예측
Y_pred_best_1 = best_model.predict(X_test_1)
Y_pred_best_2 = best_model.predict(X_test_2)
Y_pred_best_3 = best_model.predict(X_test_3)

# 정규화 역변환
Y_pred_actual_1 = scaler_Y.inverse_transform(Y_pred_best_1)
Y_test_actual_1 = scaler_Y.inverse_transform(Y_test_1)
Y_pred_actual_2 = scaler_Y.inverse_transform(Y_pred_best_2)
Y_test_actual_2 = scaler_Y.inverse_transform(Y_test_2)
Y_pred_actual_3 = scaler_Y.inverse_transform(Y_pred_best_3)
Y_test_actual_3 = scaler_Y.inverse_transform(Y_test_3)


In [None]:
# Event 10 평가 및 시각화
evaluate_and_plot(Y_test_actual_1, Y_pred_actual_1, f'{assigned_test[2]}', lead_time)

# Event 9 평가 및 시각화
evaluate_and_plot(Y_test_actual_2, Y_pred_actual_2, f'{assigned_test[1]}', lead_time)

# Event 8 평가 및 시각화
evaluate_and_plot(Y_test_actual_3, Y_pred_actual_3, f'{assigned_test[0]}', lead_time)

## 4.6 6hr 모델

In [None]:
set_all_seeds(42)

# 최적 모델로 학습
best_sequence_length = best_hps.get('sequence_length')
best_units = best_hps.get('units')
best_learning_rate = best_hps.get('learning_rate')
best_dropout = best_hps.get('dropout')
best_batch_size = best_hps.get('batch_size')
lead_time = 6


X_train, Y_train, X_val, Y_val, X_test_1, Y_test_1, X_test_2, Y_test_2, X_test_3, Y_test_3, assigned_train, assigned_val, assigned_test = split_data(best_sequence_length, lead_time)


# MyHyperModel의 인스턴스 생성
hypermodel = MyHyperModel()
best_model = hypermodel.build(best_hps)

# 모델 학습
history = best_model.fit(X_train, Y_train, 
                         validation_data=(X_val, Y_val), 
                         epochs=100, 
                         batch_size=best_batch_size, 
                         callbacks=[
                             EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
                             ModelCheckpoint("best_model_with_tuning(예측선행시간적용O)_6시간.h5", monitor='val_loss', save_best_only=True)
                         ])

best_model.save('best_model_with_tuning(예측선행시간적용O)_6시간.h5')

In [None]:
# 손실 그래프
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Training Loss', color='blue')
plt.plot(history.history['val_loss'], label='Validation Loss', color='red')
plt.title('Training and Validation Loss', fontsize=22)
plt.xlabel('Epoch', fontsize=20)
plt.ylabel('Loss', fontsize=20)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.legend(fontsize=15, loc = 'upper right')
plt.show()

In [None]:
# 예측
Y_pred_best_1 = best_model.predict(X_test_1)
Y_pred_best_2 = best_model.predict(X_test_2)
Y_pred_best_3 = best_model.predict(X_test_3)

# 정규화 역변환
Y_pred_actual_1 = scaler_Y.inverse_transform(Y_pred_best_1)
Y_test_actual_1 = scaler_Y.inverse_transform(Y_test_1)
Y_pred_actual_2 = scaler_Y.inverse_transform(Y_pred_best_2)
Y_test_actual_2 = scaler_Y.inverse_transform(Y_test_2)
Y_pred_actual_3 = scaler_Y.inverse_transform(Y_pred_best_3)
Y_test_actual_3 = scaler_Y.inverse_transform(Y_test_3)


In [None]:
# Event 10 평가 및 시각화
evaluate_and_plot(Y_test_actual_1, Y_pred_actual_1, f'{assigned_test[2]}', lead_time)

# Event 9 평가 및 시각화
evaluate_and_plot(Y_test_actual_2, Y_pred_actual_2, f'{assigned_test[1]}', lead_time)

# Event 8 평가 및 시각화
evaluate_and_plot(Y_test_actual_3, Y_pred_actual_3, f'{assigned_test[0]}', lead_time)

## 4.7 7hr 모델

In [None]:
set_all_seeds(42)

# 최적 모델로 학습
best_sequence_length = best_hps.get('sequence_length')
best_units = best_hps.get('units')
best_learning_rate = best_hps.get('learning_rate')
best_dropout = best_hps.get('dropout')
best_batch_size = best_hps.get('batch_size')
lead_time = 7


X_train, Y_train, X_val, Y_val, X_test_1, Y_test_1, X_test_2, Y_test_2, X_test_3, Y_test_3, assigned_train, assigned_val, assigned_test = split_data(best_sequence_length, lead_time)


# MyHyperModel의 인스턴스 생성
hypermodel = MyHyperModel()
best_model = hypermodel.build(best_hps)

# 모델 학습
history = best_model.fit(X_train, Y_train, 
                         validation_data=(X_val, Y_val), 
                         epochs=100, 
                         batch_size=best_batch_size, 
                         callbacks=[
                             EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
                             ModelCheckpoint("best_model_with_tuning(예측선행시간적용O)_7시간.h5", monitor='val_loss', save_best_only=True)
                         ])

best_model.save('best_model_with_tuning(예측선행시간적용O)_7시간.h5')

In [None]:
# 손실 그래프
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Training Loss', color='blue')
plt.plot(history.history['val_loss'], label='Validation Loss', color='red')
plt.title('Training and Validation Loss', fontsize=22)
plt.xlabel('Epoch', fontsize=20)
plt.ylabel('Loss', fontsize=20)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.legend(fontsize=15, loc = 'upper right')
plt.show()

In [None]:
# 예측
Y_pred_best_1 = best_model.predict(X_test_1)
Y_pred_best_2 = best_model.predict(X_test_2)
Y_pred_best_3 = best_model.predict(X_test_3)

# 정규화 역변환
Y_pred_actual_1 = scaler_Y.inverse_transform(Y_pred_best_1)
Y_test_actual_1 = scaler_Y.inverse_transform(Y_test_1)
Y_pred_actual_2 = scaler_Y.inverse_transform(Y_pred_best_2)
Y_test_actual_2 = scaler_Y.inverse_transform(Y_test_2)
Y_pred_actual_3 = scaler_Y.inverse_transform(Y_pred_best_3)
Y_test_actual_3 = scaler_Y.inverse_transform(Y_test_3)


In [None]:
# Event 10 평가 및 시각화
evaluate_and_plot(Y_test_actual_1, Y_pred_actual_1, f'{assigned_test[2]}', lead_time)

# Event 9 평가 및 시각화
evaluate_and_plot(Y_test_actual_2, Y_pred_actual_2, f'{assigned_test[1]}', lead_time)

# Event 8 평가 및 시각화
evaluate_and_plot(Y_test_actual_3, Y_pred_actual_3, f'{assigned_test[0]}', lead_time)

## 4.8 8hr 모델

In [None]:
set_all_seeds(42)

# 최적 모델로 학습
best_sequence_length = best_hps.get('sequence_length')
best_units = best_hps.get('units')
best_learning_rate = best_hps.get('learning_rate')
best_dropout = best_hps.get('dropout')
best_batch_size = best_hps.get('batch_size')
lead_time = 8


X_train, Y_train, X_val, Y_val, X_test_1, Y_test_1, X_test_2, Y_test_2, X_test_3, Y_test_3, assigned_train, assigned_val, assigned_test = split_data(best_sequence_length, lead_time)


# MyHyperModel의 인스턴스 생성
hypermodel = MyHyperModel()
best_model = hypermodel.build(best_hps)

# 모델 학습
history = best_model.fit(X_train, Y_train, 
                         validation_data=(X_val, Y_val), 
                         epochs=100, 
                         batch_size=best_batch_size, 
                         callbacks=[
                             EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
                             ModelCheckpoint("best_model_with_tuning(예측선행시간적용O)_8시간.h5", monitor='val_loss', save_best_only=True)
                         ])

best_model.save('best_model_with_tuning(예측선행시간적용O)_8시간.h5')

In [None]:
# 손실 그래프
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Training Loss', color='blue')
plt.plot(history.history['val_loss'], label='Validation Loss', color='red')
plt.title('Training and Validation Loss', fontsize=22)
plt.xlabel('Epoch', fontsize=20)
plt.ylabel('Loss', fontsize=20)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.legend(fontsize=15, loc = 'upper right')
plt.show()

In [None]:
# 예측
Y_pred_best_1 = best_model.predict(X_test_1)
Y_pred_best_2 = best_model.predict(X_test_2)
Y_pred_best_3 = best_model.predict(X_test_3)

# 정규화 역변환
Y_pred_actual_1 = scaler_Y.inverse_transform(Y_pred_best_1)
Y_test_actual_1 = scaler_Y.inverse_transform(Y_test_1)
Y_pred_actual_2 = scaler_Y.inverse_transform(Y_pred_best_2)
Y_test_actual_2 = scaler_Y.inverse_transform(Y_test_2)
Y_pred_actual_3 = scaler_Y.inverse_transform(Y_pred_best_3)
Y_test_actual_3 = scaler_Y.inverse_transform(Y_test_3)


In [None]:
# Event 10 평가 및 시각화
evaluate_and_plot(Y_test_actual_1, Y_pred_actual_1, f'{assigned_test[2]}', lead_time)

# Event 9 평가 및 시각화
evaluate_and_plot(Y_test_actual_2, Y_pred_actual_2, f'{assigned_test[1]}', lead_time)

# Event 8 평가 및 시각화
evaluate_and_plot(Y_test_actual_3, Y_pred_actual_3, f'{assigned_test[0]}', lead_time)

## 4.9 9hr 모델

In [None]:
set_all_seeds(42)

# 최적 모델로 학습
best_sequence_length = best_hps.get('sequence_length')
best_units = best_hps.get('units')
best_learning_rate = best_hps.get('learning_rate')
best_dropout = best_hps.get('dropout')
best_batch_size = best_hps.get('batch_size')
lead_time = 9


X_train, Y_train, X_val, Y_val, X_test_1, Y_test_1, X_test_2, Y_test_2, X_test_3, Y_test_3, assigned_train, assigned_val, assigned_test = split_data(best_sequence_length, lead_time)


# MyHyperModel의 인스턴스 생성
hypermodel = MyHyperModel()
best_model = hypermodel.build(best_hps)

# 모델 학습
history = best_model.fit(X_train, Y_train, 
                         validation_data=(X_val, Y_val), 
                         epochs=100, 
                         batch_size=best_batch_size, 
                         callbacks=[
                             EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
                             ModelCheckpoint("best_model_with_tuning(예측선행시간적용O)_9시간.h5", monitor='val_loss', save_best_only=True)
                         ])

best_model.save('best_model_with_tuning(예측선행시간적용O)_9시간.h5')

In [None]:
# 손실 그래프
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Training Loss', color='blue')
plt.plot(history.history['val_loss'], label='Validation Loss', color='red')
plt.title('Training and Validation Loss', fontsize=22)
plt.xlabel('Epoch', fontsize=20)
plt.ylabel('Loss', fontsize=20)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.legend(fontsize=15, loc = 'upper right')
plt.show()

In [None]:
# 예측
Y_pred_best_1 = best_model.predict(X_test_1)
Y_pred_best_2 = best_model.predict(X_test_2)
Y_pred_best_3 = best_model.predict(X_test_3)

# 정규화 역변환
Y_pred_actual_1 = scaler_Y.inverse_transform(Y_pred_best_1)
Y_test_actual_1 = scaler_Y.inverse_transform(Y_test_1)
Y_pred_actual_2 = scaler_Y.inverse_transform(Y_pred_best_2)
Y_test_actual_2 = scaler_Y.inverse_transform(Y_test_2)
Y_pred_actual_3 = scaler_Y.inverse_transform(Y_pred_best_3)
Y_test_actual_3 = scaler_Y.inverse_transform(Y_test_3)


In [None]:
# Event 10 평가 및 시각화
evaluate_and_plot(Y_test_actual_1, Y_pred_actual_1, f'{assigned_test[2]}', lead_time)

# Event 9 평가 및 시각화
evaluate_and_plot(Y_test_actual_2, Y_pred_actual_2, f'{assigned_test[1]}', lead_time)

# Event 8 평가 및 시각화
evaluate_and_plot(Y_test_actual_3, Y_pred_actual_3, f'{assigned_test[0]}', lead_time)

## 4.10 10hr 모델

In [None]:
set_all_seeds(42)

# 최적 모델로 학습
best_sequence_length = best_hps.get('sequence_length')
best_units = best_hps.get('units')
best_learning_rate = best_hps.get('learning_rate')
best_dropout = best_hps.get('dropout')
best_batch_size = best_hps.get('batch_size')
lead_time = 10


X_train, Y_train, X_val, Y_val, X_test_1, Y_test_1, X_test_2, Y_test_2, X_test_3, Y_test_3, assigned_train, assigned_val, assigned_test = split_data(best_sequence_length, lead_time)


# MyHyperModel의 인스턴스 생성
hypermodel = MyHyperModel()
best_model = hypermodel.build(best_hps)

# 모델 학습
history = best_model.fit(X_train, Y_train, 
                         validation_data=(X_val, Y_val), 
                         epochs=100, 
                         batch_size=best_batch_size, 
                         callbacks=[
                             EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
                             ModelCheckpoint("best_model_with_tuning(예측선행시간적용O)_10시간.h5", monitor='val_loss', save_best_only=True)
                         ])

best_model.save('best_model_with_tuning(예측선행시간적용O)_10시간.h5')

In [None]:
# 손실 그래프
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Training Loss', color='blue')
plt.plot(history.history['val_loss'], label='Validation Loss', color='red')
plt.title('Training and Validation Loss', fontsize=22)
plt.xlabel('Epoch', fontsize=20)
plt.ylabel('Loss', fontsize=20)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.legend(fontsize=15, loc = 'upper right')
plt.show()

In [None]:
# 예측
Y_pred_best_1 = best_model.predict(X_test_1)
Y_pred_best_2 = best_model.predict(X_test_2)
Y_pred_best_3 = best_model.predict(X_test_3)

# 정규화 역변환
Y_pred_actual_1 = scaler_Y.inverse_transform(Y_pred_best_1)
Y_test_actual_1 = scaler_Y.inverse_transform(Y_test_1)
Y_pred_actual_2 = scaler_Y.inverse_transform(Y_pred_best_2)
Y_test_actual_2 = scaler_Y.inverse_transform(Y_test_2)
Y_pred_actual_3 = scaler_Y.inverse_transform(Y_pred_best_3)
Y_test_actual_3 = scaler_Y.inverse_transform(Y_test_3)


In [None]:
# Event 10 평가 및 시각화
evaluate_and_plot(Y_test_actual_1, Y_pred_actual_1, f'{assigned_test[2]}', lead_time)

# Event 9 평가 및 시각화
evaluate_and_plot(Y_test_actual_2, Y_pred_actual_2, f'{assigned_test[1]}', lead_time)

# Event 8 평가 및 시각화
evaluate_and_plot(Y_test_actual_3, Y_pred_actual_3, f'{assigned_test[0]}', lead_time)

## 4.11 11hr 모델

In [None]:
set_all_seeds(42)

# 최적 모델로 학습
best_sequence_length = best_hps.get('sequence_length')
best_units = best_hps.get('units')
best_learning_rate = best_hps.get('learning_rate')
best_dropout = best_hps.get('dropout')
best_batch_size = best_hps.get('batch_size')
lead_time = 11


X_train, Y_train, X_val, Y_val, X_test_1, Y_test_1, X_test_2, Y_test_2, X_test_3, Y_test_3, assigned_train, assigned_val, assigned_test = split_data(best_sequence_length, lead_time)


# MyHyperModel의 인스턴스 생성
hypermodel = MyHyperModel()
best_model = hypermodel.build(best_hps)

# 모델 학습
history = best_model.fit(X_train, Y_train, 
                         validation_data=(X_val, Y_val), 
                         epochs=100, 
                         batch_size=best_batch_size, 
                         callbacks=[
                             EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
                             ModelCheckpoint("best_model_with_tuning(예측선행시간적용O)_11시간.h5", monitor='val_loss', save_best_only=True)
                         ])

best_model.save('best_model_with_tuning(예측선행시간적용O)_11시간.h5')

In [None]:
# 손실 그래프
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Training Loss', color='blue')
plt.plot(history.history['val_loss'], label='Validation Loss', color='red')
plt.title('Training and Validation Loss', fontsize=22)
plt.xlabel('Epoch', fontsize=20)
plt.ylabel('Loss', fontsize=20)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.legend(fontsize=15, loc = 'upper right')
plt.show()

In [None]:
# 예측
Y_pred_best_1 = best_model.predict(X_test_1)
Y_pred_best_2 = best_model.predict(X_test_2)
Y_pred_best_3 = best_model.predict(X_test_3)

# 정규화 역변환
Y_pred_actual_1 = scaler_Y.inverse_transform(Y_pred_best_1)
Y_test_actual_1 = scaler_Y.inverse_transform(Y_test_1)
Y_pred_actual_2 = scaler_Y.inverse_transform(Y_pred_best_2)
Y_test_actual_2 = scaler_Y.inverse_transform(Y_test_2)
Y_pred_actual_3 = scaler_Y.inverse_transform(Y_pred_best_3)
Y_test_actual_3 = scaler_Y.inverse_transform(Y_test_3)


In [None]:
# Event 10 평가 및 시각화
evaluate_and_plot(Y_test_actual_1, Y_pred_actual_1, f'{assigned_test[2]}', lead_time)

# Event 9 평가 및 시각화
evaluate_and_plot(Y_test_actual_2, Y_pred_actual_2, f'{assigned_test[1]}', lead_time)

# Event 8 평가 및 시각화
evaluate_and_plot(Y_test_actual_3, Y_pred_actual_3, f'{assigned_test[0]}', lead_time)

## 4.12 12hr 모델

In [None]:
set_all_seeds(42)

# 최적 모델로 학습
best_sequence_length = best_hps.get('sequence_length')
best_units = best_hps.get('units')
best_learning_rate = best_hps.get('learning_rate')
best_dropout = best_hps.get('dropout')
best_batch_size = best_hps.get('batch_size')
lead_time = 12


X_train, Y_train, X_val, Y_val, X_test_1, Y_test_1, X_test_2, Y_test_2, X_test_3, Y_test_3, assigned_train, assigned_val, assigned_test = split_data(best_sequence_length, lead_time)


# MyHyperModel의 인스턴스 생성
hypermodel = MyHyperModel()
best_model = hypermodel.build(best_hps)

# 모델 학습
history = best_model.fit(X_train, Y_train, 
                         validation_data=(X_val, Y_val), 
                         epochs=100, 
                         batch_size=best_batch_size, 
                         callbacks=[
                             EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
                             ModelCheckpoint("best_model_with_tuning(예측선행시간적용O)_12시간.h5", monitor='val_loss', save_best_only=True)
                         ])

best_model.save('best_model_with_tuning(예측선행시간적용O)_12시간.h5')

In [None]:
# 손실 그래프
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Training Loss', color='blue')
plt.plot(history.history['val_loss'], label='Validation Loss', color='red')
plt.title('Training and Validation Loss', fontsize=22)
plt.xlabel('Epoch', fontsize=20)
plt.ylabel('Loss', fontsize=20)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.legend(fontsize=15, loc = 'upper right')
plt.show()

In [None]:
# 예측
Y_pred_best_1 = best_model.predict(X_test_1)
Y_pred_best_2 = best_model.predict(X_test_2)
Y_pred_best_3 = best_model.predict(X_test_3)

# 정규화 역변환
Y_pred_actual_1 = scaler_Y.inverse_transform(Y_pred_best_1)
Y_test_actual_1 = scaler_Y.inverse_transform(Y_test_1)
Y_pred_actual_2 = scaler_Y.inverse_transform(Y_pred_best_2)
Y_test_actual_2 = scaler_Y.inverse_transform(Y_test_2)
Y_pred_actual_3 = scaler_Y.inverse_transform(Y_pred_best_3)
Y_test_actual_3 = scaler_Y.inverse_transform(Y_test_3)


In [None]:
# Event 10 평가 및 시각화
evaluate_and_plot(Y_test_actual_1, Y_pred_actual_1, f'{assigned_test[2]}', lead_time)

# Event 9 평가 및 시각화
evaluate_and_plot(Y_test_actual_2, Y_pred_actual_2, f'{assigned_test[1]}', lead_time)

# Event 8 평가 및 시각화
evaluate_and_plot(Y_test_actual_3, Y_pred_actual_3, f'{assigned_test[0]}', lead_time)

## 4.24 24hr 모델

In [None]:
set_all_seeds(42)

# 최적 모델로 학습
best_sequence_length = best_hps.get('sequence_length')
best_units = best_hps.get('units')
best_learning_rate = best_hps.get('learning_rate')
best_dropout = best_hps.get('dropout')
best_batch_size = best_hps.get('batch_size')
lead_time = 24


X_train, Y_train, X_val, Y_val, X_test_1, Y_test_1, X_test_2, Y_test_2, X_test_3, Y_test_3, assigned_train, assigned_val, assigned_test = split_data(best_sequence_length, lead_time)


# MyHyperModel의 인스턴스 생성
hypermodel = MyHyperModel()
best_model = hypermodel.build(best_hps)

# 모델 학습
history = best_model.fit(X_train, Y_train, 
                         validation_data=(X_val, Y_val), 
                         epochs=100, 
                         batch_size=best_batch_size, 
                         callbacks=[
                             EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
                             ModelCheckpoint("best_model_with_tuning(예측선행시간적용O)_24시간.h5", monitor='val_loss', save_best_only=True)
                         ])

best_model.save('best_model_with_tuning(예측선행시간적용O)_24시간.h5')

In [None]:
# 손실 그래프
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Training Loss', color='blue')
plt.plot(history.history['val_loss'], label='Validation Loss', color='red')
plt.title('Training and Validation Loss', fontsize=22)
plt.xlabel('Epoch', fontsize=20)
plt.ylabel('Loss', fontsize=20)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)
plt.legend(fontsize=15, loc = 'upper right')
plt.show()

In [None]:
# 예측
Y_pred_best_1 = best_model.predict(X_test_1)
Y_pred_best_2 = best_model.predict(X_test_2)
Y_pred_best_3 = best_model.predict(X_test_3)

# 정규화 역변환
Y_pred_actual_1 = scaler_Y.inverse_transform(Y_pred_best_1)
Y_test_actual_1 = scaler_Y.inverse_transform(Y_test_1)
Y_pred_actual_2 = scaler_Y.inverse_transform(Y_pred_best_2)
Y_test_actual_2 = scaler_Y.inverse_transform(Y_test_2)
Y_pred_actual_3 = scaler_Y.inverse_transform(Y_pred_best_3)
Y_test_actual_3 = scaler_Y.inverse_transform(Y_test_3)


In [None]:
# 모델 성능 평가 및 시각화 함수
def evaluate_and_plot(Y_test_actual, Y_pred_actual, event_label, lead_time):
    mse = mean_squared_error(Y_test_actual, Y_pred_actual)
    mae = mean_absolute_error(Y_test_actual, Y_pred_actual)
    r2 = r2_score(Y_test_actual, Y_pred_actual)
    rmse = np.sqrt(mse)

    metrics = {'MSE': mse, 'MAE': mae, 'NSE': r2, 'RMSE': rmse}

    # 지표 시각화
    fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(12, 8))
    for (metric, value), ax in zip(metrics.items(), axes.flatten()):
        ax.text(0.5, 0.5, f'{metric}\n{value:.6f}', fontsize=40, ha='center', va='center')
        ax.tick_params(axis='both', which='both', bottom=False, top=False, left=False, right=False, 
                       labelbottom=False, labelleft=False)
    plt.tight_layout()
    plt.show()

    # 실제값과 예측값 비교 그래프
    time_index = get_time_index(event_label, Y_test_actual)

    # 예측값을 리드 타임만큼 잘라서 관측값과 동일한 길이로 만듦
    trimmed_pred_actual = Y_pred_actual[lead_time:].flatten()

    # 관측값도 동일한 길이로 맞춤
    trimmed_test_actual = Y_test_actual[:len(trimmed_pred_actual)].flatten()
    trimmed_time_index = time_index[:len(trimmed_pred_actual)]
    
    plt.figure(figsize=(10, 5))
    plt.plot(trimmed_time_index, trimmed_test_actual, label='실제 값', color='blue', linewidth=3)
    plt.plot(trimmed_time_index, trimmed_pred_actual, label='예측 값', color='red', linewidth=3)
    plt.title(f'고달교 수위 예측값과 실제 값 비교: {event_label}', fontsize=30, fontweight='bold')
    plt.xlabel('관측 일시', fontsize=25, fontweight='bold')
    plt.ylabel('고달교 수위', fontsize=25, fontweight='bold')
    plt.xticks(rotation=45, fontsize=20)
    plt.yticks(fontsize=20)
    plt.legend(fontsize=18, loc='upper right')
    plt.show()

    # scatter plot
    plt.figure(figsize=(8, 8))
    plt.scatter(trimmed_test_actual, trimmed_pred_actual, color='blue')
    plt.plot([trimmed_test_actual.min(), trimmed_test_actual.max()], [trimmed_test_actual.min(), trimmed_test_actual.max()], color='red', lw=2)
    plt.title(f'실제 값 vs. 예측 값: {event_label}', fontsize=30, fontweight = 'bold')
    plt.xlabel('실제 값', fontsize=25, fontweight = 'bold')
    plt.ylabel('예측 값', fontsize=25, fontweight = 'bold')
    plt.xticks(fontsize = 20)
    plt.yticks(fontsize = 20)
    plt.show()

In [None]:
# Event 10 평가 및 시각화
evaluate_and_plot(Y_test_actual_1, Y_pred_actual_1, f'{assigned_test[2]}', lead_time)

# Event 9 평가 및 시각화
evaluate_and_plot(Y_test_actual_2, Y_pred_actual_2, f'{assigned_test[1]}', lead_time)

# Event 8 평가 및 시각화
evaluate_and_plot(Y_test_actual_3, Y_pred_actual_3, f'{assigned_test[0]}', lead_time)

In [None]:
# 리드 타임별 평가 지표 저장을 위한 리스트 초기화
lead_times = list(range(1, 13)) + [24]  # 1~12시간, 그리고 24시간
mse_list = []
mae_list = []
rmse_list = []
nse_list = []

def collect_metrics(Y_test_actual, Y_pred_actual):
    mse = mean_squared_error(Y_test_actual, Y_pred_actual)
    mae = mean_absolute_error(Y_test_actual, Y_pred_actual)
    nse = r2_score(Y_test_actual, Y_pred_actual)
    rmse = np.sqrt(mse)
    return mse, mae, rmse, nse

# 각 리드 타임에 대해 지표 계산
for lead_time in lead_times:
    # 이전에 학습된 모델을 불러오거나 재학습을 수행한 후 예측을 수행합니다.
    # 예를 들어, 리드 타임에 따라 모델 예측을 수행하여 Y_pred_actual_1, Y_pred_actual_2, Y_pred_actual_3를 얻음

    # 각 이벤트에 대해 지표를 계산
    mse1, mae1, rmse1, nse1 = collect_metrics(Y_test_actual_1, Y_pred_actual_1)
    mse2, mae2, rmse2, nse2 = collect_metrics(Y_test_actual_2, Y_pred_actual_2)
    mse3, mae3, rmse3, nse3 = collect_metrics(Y_test_actual_3, Y_pred_actual_3)

    # 평균값 계산 (필요시 이벤트별로 개별적으로 관리 가능)
    mse_list.append(np.mean([mse1, mse2, mse3]))
    mae_list.append(np.mean([mae1, mae2, mae3]))
    rmse_list.append(np.mean([rmse1, rmse2, rmse3]))
    nse_list.append(np.mean([nse1, nse2, nse3]))


In [None]:
# 지표별로 그래프 그리기
plt.figure(figsize=(12, 8))

# MSE 그래프
plt.subplot(2, 2, 1)
plt.plot(lead_times, mse_list, marker='o', color='blue', label='MSE')
plt.title('MSE vs Lead Time', fontsize=15)
plt.xlabel('Lead Time (hours)', fontsize=12)
plt.ylabel('MSE', fontsize=12)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.grid(True)

# MAE 그래프
plt.subplot(2, 2, 2)
plt.plot(lead_times, mae_list, marker='o', color='orange', label='MAE')
plt.title('MAE vs Lead Time', fontsize=15)
plt.xlabel('Lead Time (hours)', fontsize=12)
plt.ylabel('MAE', fontsize=12)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.grid(True)

# RMSE 그래프
plt.subplot(2, 2, 3)
plt.plot(lead_times, rmse_list, marker='o', color='green', label='RMSE')
plt.title('RMSE vs Lead Time', fontsize=15)
plt.xlabel('Lead Time (hours)', fontsize=12)
plt.ylabel('RMSE', fontsize=12)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.grid(True)

# NSE 그래프
plt.subplot(2, 2, 4)
plt.plot(lead_times, nse_list, marker='o', color='red', label='NSE')
plt.title('NSE vs Lead Time', fontsize=15)
plt.xlabel('Lead Time (hours)', fontsize=12)
plt.ylabel('NSE', fontsize=12)
plt.xticks(fontsize=10)
plt.yticks(fontsize=10)
plt.grid(True)

plt.tight_layout()
plt.show()