In [None]:
# # LSTMモデルを使用して時系列データを処理し、訓練、検証、テストデータセット上で予測を行う


import pandas as pd
import numpy as np
from keras.models import Sequential, model_from_json
from keras.layers import Dense, LSTM
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
import os
import matplotlib.pyplot as plt

# 入力と出力の水質指標
wtp_in = 'bukkou_in'
wtp_out = 'sakuradai'

# データセットの読み込みと前処理
def load_and_process_data(file_name, wtp_in, wtp_out):
    df = pd.read_csv(file_name)
    df['target'] = df[wtp_in] - df[wtp_out]
    return df[['target']]

# 訓練、検証、テストデータの読み込み
df_train = load_and_process_data('train.csv', wtp_in, wtp_out)
df_val = load_and_process_data('val.csv', wtp_in, wtp_out)
df_test = load_and_process_data('test.csv', wtp_in, wtp_out)

# ハイパーパラメータ
time_steps = 24
future = 6
n_hidden = 1024
epochs = 50
batch_size = 64

# データセットの正規化関数
def normalize_data(df, df_base):
    return (df - df_base.min()) / (df_base.max() - df_base.min())

# データセットの生成関数
def create_dataset(df, time_steps, future):
    data, target = [], []
    for i in range(len(df) - time_steps - future):
        data.append(df.values[i: i + time_steps])
        target.append(df.values[i + time_steps + future])
    return np.array(data), np.array(target)

# データセットの正規化と生成
df_base = df_train
df_train_normalized = normalize_data(df_train, df_base)
df_val_normalized = normalize_data(df_val, df_base)
df_test_normalized = normalize_data(df_test, df_base)

x_train, y_train = create_dataset(df_train_normalized, time_steps, future)
x_val, y_val = create_dataset(df_val_normalized, time_steps, future)
x_test, y_test = create_dataset(df_test_normalized, time_steps, future)

# モデルの構築
model = Sequential()
model.add(LSTM(n_hidden, input_shape=(time_steps, 1), activation='elu'))
model.add(Dense(1))
model.compile(loss="mse", optimizer=Adam(lr=0.001))

# コールバックの設定
callbacks = [
    EarlyStopping(monitor='val_loss', patience=5),
    ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, min_lr=1e-6),
    ModelCheckpoint(filepath=f'{wtp_in}_{wtp_out}_model.h5', monitor='val_loss', save_best_only=True)
]

new_folder = f"{wtp_in}_{wtp_out}_TimeStep{time_steps}_Future{future}"

# モデルの訓練
history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_val, y_val), callbacks=callbacks, verbose=2)

open(new_folder + '/' + 'model.json', "w").write(model.to_json())
model.save_weights(new_folder + '/' + 'last_model.h5')

json_file = open(new_folder + '/' + 'model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
model = model_from_json(loaded_model_json)

weightPath = new_folder + '/' + 'model.h5'
if not os.path.exists(new_folder):
    model.load_weights(new_folder + '/' + 'last_model.h5')
else:
    model.load_weights(new_folder + '/' + 'model.h5')


optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999)
model.compile(loss="mse", optimizer=optimizer)


# 予測結果の保存関数
def save_predictions(model, x, y, file_name):
    predictions = model.predict(x)
    pd.DataFrame(predictions).to_csv(file_name + '_predictions.csv')
    pd.DataFrame(y).to_csv(file_name + '_actual.csv')

# 予測結果の保存
save_predictions(model, x_train, y_train, 'train')
save_predictions(model, x_val, y_val, 'val')
save_predictions(model, x_test, y_test, 'test')

# 訓練と検証の損失をプロット
plt.plot(history.history['loss'], label='Training loss')
plt.plot(history.history['val_loss'], label='Validation loss')
plt.legend()
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.savefig('training_validation_loss.pdf')
plt.show()


