<a href="https://colab.research.google.com/github/OWL-408/CIT2024/blob/main/%E3%83%8F%E3%82%A4%E3%83%91%E3%83%BC%E3%83%91%E3%83%A9%E3%83%A1%E3%83%BC%E3%82%BF%E7%B5%84%E3%81%BF%E5%90%88%E3%82%8F%E3%81%9B%E6%8E%A2%E7%B4%A2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

次のハイパーパラメータのすべての組み合わせで供試データ別に誤差を出力する。
誤差が最小かつ計算コストが最小となる条件が最適条件である。



In [None]:
import os
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from google.colab import drive

# Googleドライブをマウント
drive.mount('/content/drive')

def create_dataset(data, time_step=10):
    X, Y = [], []
    for i in range(len(data) - time_step):
        X.append(data[i:i + time_step])
        Y.append(data[i + time_step, 1])
    return np.array(X), np.array(Y)

input_folder = '/content/drive/MyDrive/program_LSTM/input-data'
output_folder = '/content/drive/MyDrive/program_LSTM/tuning-result/plot'
os.makedirs(output_folder, exist_ok=True)

# エポックとバッチサイズ、ニューロン数の候補を設定
epoch_options = [10, 30, 50, 100]
batch_size_options = [32, 64, 128, 256]
neuron_options = [1, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]

result_summary = []

# フォントサイズの設定
title_font_size = 24
label_font_size = 20
legend_font_size = 20
ticks_font_size = 18

# 各ファイルに対して処理を実行
for filename in os.listdir(input_folder):
    if not filename.endswith(".csv"):
        continue

    file_path = os.path.join(input_folder, filename)
    df = pd.read_csv(file_path)

    # 必要な列のみ抽出
    required_columns = ['time', 'Nominal strain', 'Nominal stress']
    if not all(column in df.columns for column in required_columns):
        print(f"Required columns not found in {filename}. Skipping this file.")
        continue

    df = df[required_columns]
    df['time'] = pd.to_datetime(df['time'])

    # 時系列データをインデックスに設定
    df.set_index('time', inplace=True)

    # スケーリング
    scaler = MinMaxScaler(feature_range=(0, 1))
    df_scaled = scaler.fit_transform(df)

    # データセット作成
    time_step = 10
    X, Y = create_dataset(df_scaled, time_step)

    # データ分割
    train_size = int(len(X) * 0.7)
    X_train, X_test = X[:train_size], X[train_size:]
    Y_train, Y_test = Y[:train_size], Y[train_size:]

    for epochs in epoch_options:
        for batch_size in batch_size_options:
            best_loss = float('inf')
            best_neurons = 0

            for neurons in neuron_options:
                print(f"Training with {epochs} epochs, batch size of {batch_size}, and {neurons} neurons...")

                # 時間計測開始
                start_time = time.time()

                # LSTMモデルを構築
                model = Sequential()
                model.add(LSTM(neurons, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
                model.add(LSTM(neurons))
                model.add(Dense(1))
                model.compile(optimizer='adam', loss='mean_squared_error')

                # モデルの訓練
                history = model.fit(X_train, Y_train, validation_data=(X_test, Y_test),
                                    epochs=epochs, batch_size=batch_size, verbose=0)

                # 時間計測終了
                end_time = time.time()
                processing_time = end_time - start_time

                # 検証損失を取得
                val_loss = history.history['val_loss'][-1]
                print(f"Validation Loss with {neurons} Neurons: {val_loss:.4f} (Time: {processing_time:.2f} seconds)")

                # 最良のニューロン数を更新
                if val_loss < best_loss:
                    best_loss = val_loss
                    best_neurons = neurons

                # 学習履歴データを保存
                history_df = pd.DataFrame(history.history)
                history_filename = f"{filename}_epochs-{epochs}_batch-{batch_size}_neurons-{neurons}_history.csv"
                history_df.to_csv(os.path.join(output_folder, history_filename), index=False)

                # 学習曲線をプロット
                plt.figure(figsize=(10, 6))
                plt.plot(history.history['loss'], label='Train Loss')
                plt.plot(history.history['val_loss'], label='Validation Loss')

                # タイトル、軸ラベル、凡例の設定
                plt.title(f'Model Loss for {filename}\nEpochs: {epochs}, Batch Size: {batch_size}, Neurons: {neurons}',
                          fontsize=title_font_size, fontweight='bold')
                plt.ylabel('Loss', fontsize=label_font_size)
                plt.xlabel('Epoch', fontsize=label_font_size)
                plt.legend(loc='upper right', fontsize=legend_font_size)
                plt.xticks(fontsize=ticks_font_size)
                plt.yticks(fontsize=ticks_font_size)

                plt.tight_layout()
                plt.savefig(f"{output_folder}/{filename}_epochs-{epochs}_batch-{batch_size}_neurons-{neurons}_plot.png", dpi=300)
                plt.close()

                # 条件別の結果を保存
                result_summary.append({
                    'filename': filename,
                    'epochs': epochs,
                    'batch_size': batch_size,
                    'neurons': neurons,
                    'val_loss': val_loss,
                    'processing_time': processing_time
                })

# 最終的な結果をCSVファイルに保存
results_df = pd.DataFrame(result_summary)
results_summary_path = '/content/drive/MyDrive/program_LSTM/tuning-result/LSTM_tuning_summary.csv'
results_df.to_csv(results_summary_path, index=False)

print(f'Tuning results saved to {results_summary_path}')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Training with 10 epochs, batch size of 32, and 1 neurons...


  super().__init__(**kwargs)


Validation Loss with 1 Neurons: 0.0169 (Time: 5.60 seconds)
Training with 10 epochs, batch size of 32, and 5 neurons...
Validation Loss with 5 Neurons: 0.0370 (Time: 5.58 seconds)
Training with 10 epochs, batch size of 32, and 10 neurons...
Validation Loss with 10 Neurons: 0.0079 (Time: 5.69 seconds)
Training with 10 epochs, batch size of 32, and 20 neurons...
Validation Loss with 20 Neurons: 0.0107 (Time: 5.63 seconds)
Training with 10 epochs, batch size of 32, and 30 neurons...
Validation Loss with 30 Neurons: 0.0101 (Time: 6.34 seconds)
Training with 10 epochs, batch size of 32, and 40 neurons...
Validation Loss with 40 Neurons: 0.0136 (Time: 7.14 seconds)
Training with 10 epochs, batch size of 32, and 50 neurons...
Validation Loss with 50 Neurons: 0.0096 (Time: 5.47 seconds)
Training with 10 epochs, batch size of 32, and 60 neurons...
Validation Loss with 60 Neurons: 0.0097 (Time: 7.31 seconds)
Training with 10 epochs, batch size of 32, and 70 neurons...
Validation Loss with 70 Neu

サポートコードA：全データでは処理が重くなるためデータ個数指定しリサンプリングする場合

In [None]:
import os
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from google.colab import drive

# Googleドライブをマウント
drive.mount('/content/drive')

def create_dataset(data, time_step=10):
    X, Y = [], []
    for i in range(len(data) - time_step):
        X.append(data[i:i + time_step])
        Y.append(data[i + time_step, 1])
    return np.array(X), np.array(Y)

input_folder = '/content/drive/MyDrive/program_LSTM/input-data(2)/F'
output_folder = '/content/drive/MyDrive/program_LSTM/tuning-result_resampling1000/plot'
os.makedirs(output_folder, exist_ok=True)

# エポックとバッチサイズ、ニューロン数の候補を設定
epoch_options = [10, 30, 50, 100]
batch_size_options = [32, 64, 128, 256]
neuron_options = [1, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]

result_summary = []

# フォントサイズの設定
title_font_size = 24
label_font_size = 20
legend_font_size = 20
ticks_font_size = 18

# 各ファイルに対して処理を実行
for filename in os.listdir(input_folder):
    if not filename.endswith(".csv"):
        continue

    file_path = os.path.join(input_folder, filename)
    df = pd.read_csv(file_path)

    # 必要な列のみ抽出
    required_columns = ['time', 'Nominal strain', 'Nominal stress']
    if not all(column in df.columns for column in required_columns):
        print(f"Required columns not found in {filename}. Skipping this file.")
        continue

    df = df[required_columns]
    df['time'] = pd.to_datetime(df['time'])

    # 時系列データをインデックスに設定
    df.set_index('time', inplace=True)

    # データクリーニング: 空欄行を削除し、重複を削除
    df.dropna(inplace=True)
    df = df[~df.index.duplicated(keep='first')]  # 重複するインデックスを削除

    # リサンプリングでデータ点数を1000点に揃える
    index_new = pd.date_range(start=df.index.min(), end=df.index.max(), periods=1000)
    df_resampled = df.reindex(index_new).interpolate(method='linear')

    # スケーリング
    scaler = MinMaxScaler(feature_range=(0, 1))
    df_scaled = scaler.fit_transform(df_resampled)

    # データセット作成
    time_step = 10
    X, Y = create_dataset(df_scaled, time_step)

    # データ分割
    train_size = int(len(X) * 0.7)
    X_train, X_test = X[:train_size], X[train_size:]
    Y_train, Y_test = Y[:train_size], Y[train_size:]

    # モデルのトレーニング
    if len(X_train) == 0 or len(X_test) == 0:
        print(f"Not enough data in {filename} for training and testing. Skipping this file.")
        continue

    for epochs in epoch_options:
        for batch_size in batch_size_options:
            best_loss = float('inf')
            best_neurons = 0

            for neurons in neuron_options:
                print(f"Training with {epochs} epochs, batch size of {batch_size}, and {neurons} neurons...")

                # 時間計測開始
                start_time = time.time()

                # LSTMモデルを構築
                model = Sequential()
                model.add(LSTM(neurons, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
                model.add(LSTM(neurons))
                model.add(Dense(1))
                model.compile(optimizer='adam', loss='mean_squared_error')

                # モデルの訓練
                history = model.fit(X_train, Y_train, validation_data=(X_test, Y_test),
                                    epochs=epochs, batch_size=batch_size, verbose=0)

                # 時間計測終了
                end_time = time.time()
                processing_time = end_time - start_time

                # 検証損失を取得
                val_loss = history.history['val_loss'][-1]
                print(f"Validation Loss with {neurons} Neurons: {val_loss:.4f} (Time: {processing_time:.2f} seconds)")

                # 最良のニューロン数を更新
                if val_loss < best_loss:
                    best_loss = val_loss
                    best_neurons = neurons

                # 学習履歴データを保存
                history_df = pd.DataFrame(history.history)
                history_filename = f"{filename}_epochs-{epochs}_batch-{batch_size}_neurons-{neurons}_history.csv"
                history_df.to_csv(os.path.join(output_folder, history_filename), index=False)

                # 学習曲線をプロット
                plt.figure(figsize=(10, 6))
                plt.plot(history.history['loss'], label='Train Loss')
                plt.plot(history.history['val_loss'], label='Validation Loss')

                # タイトル、軸ラベル、凡例の設定
                plt.title(f'Model Loss for {filename}\nEpochs: {epochs}, Batch Size: {batch_size}, Neurons: {neurons}',
                          fontsize=title_font_size, fontweight='bold')
                plt.ylabel('Loss', fontsize=label_font_size)
                plt.xlabel('Epoch', fontsize=label_font_size)
                plt.legend(loc='upper right', fontsize=legend_font_size)
                plt.xticks(fontsize=ticks_font_size)
                plt.yticks(fontsize=ticks_font_size)

                plt.tight_layout()
                plt.savefig(f"{output_folder}/{filename}_epochs-{epochs}_batch-{batch_size}_neurons-{neurons}_plot.png", dpi=300)
                plt.close()

                # 条件別の結果を保存
                result_summary.append({
                    'filename': filename,
                    'epochs': epochs,
                    'batch_size': batch_size,
                    'neurons': neurons,
                    'val_loss': val_loss,
                    'processing_time': processing_time
                })

# 最終的な結果をCSVファイルに保存
results_df = pd.DataFrame(result_summary)
results_summary_path = '/content/drive/MyDrive/program_LSTM/tuning-result/LSTM_tuning_F1000_summary.csv'
results_df.to_csv(results_summary_path, index=False)

print(f'Tuning results saved to {results_summary_path}')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Training with 10 epochs, batch size of 32, and 1 neurons...


  super().__init__(**kwargs)


Validation Loss with 1 Neurons: 0.0162 (Time: 7.72 seconds)
Training with 10 epochs, batch size of 32, and 5 neurons...
Validation Loss with 5 Neurons: 0.0099 (Time: 6.56 seconds)
Training with 10 epochs, batch size of 32, and 10 neurons...
Validation Loss with 10 Neurons: 0.0023 (Time: 6.39 seconds)
Training with 10 epochs, batch size of 32, and 20 neurons...
Validation Loss with 20 Neurons: 0.0003 (Time: 8.72 seconds)
Training with 10 epochs, batch size of 32, and 30 neurons...
Validation Loss with 30 Neurons: 0.0011 (Time: 6.11 seconds)
Training with 10 epochs, batch size of 32, and 40 neurons...
Validation Loss with 40 Neurons: 0.0004 (Time: 7.57 seconds)
Training with 10 epochs, batch size of 32, and 50 neurons...
Validation Loss with 50 Neurons: 0.0006 (Time: 11.13 seconds)
Training with 10 epochs, batch size of 32, and 60 neurons...
Validation Loss with 60 Neurons: 0.0003 (Time: 8.68 seconds)
Training with 10 epochs, batch size of 32, and 70 neurons...
Validation Loss with 70 Ne

サポートコードB：ハイパーパラメータ検証プログラムが途中停止の場合のテキスト出力からcsvに変換するためのプログラム

In [None]:
# -*- coding: utf-8 -*-

import re
import pandas as pd

# 入力ファイルと出力ファイルのパスを設定
input_file_path = '/content/drive/MyDrive/program_LSTM/tuning-result/hyperparameter_result_B.txt'
output_file_path = '/content/drive/MyDrive/program_LSTM/tuning-result/hyperparameter_results_B01.csv'

# txtファイルの内容を読み込む
with open(input_file_path, 'r', encoding='utf-8') as f:
    text_data = f.read()

# デバッグ用：読み込んだ内容の一部を表示
print("ファイル内容（一部）:\n", text_data[:500])

# 正規表現パターンの作成
# ここでは、2行分の結果（Training ... と Validation Loss ...）を1組として抽出します。
pattern = re.compile(
    r"Training with (\d+)\s+epochs,\s+batch size of (\d+),\s+and (\d+)\s+neurons\.\.\.\s*"
    r"Validation Loss with \d+\s+Neurons:\s+([\d\.]+)\s+\(Time:\s+([\d\.]+)\s+seconds\)",
    re.MULTILINE
)

# 正規表現を用いて該当データを抽出する
matches = pattern.findall(text_data)
print("抽出されたエントリ数:", len(matches))
print("抽出内容（デバッグ用）:", matches)

# ファイル名（任意で設定）を変数に格納
filename = 'B.csv'

# pandasのDataFrameに変換する
# 抽出されたmatchesは各タプル形式で、要素は (epochs, batch_size, neurons, val_loss, processing_time)
df = pd.DataFrame(matches, columns=['epochs', 'batch_size', 'neurons', 'val_loss', 'processing_time'])

# データ型の変換：整数型や浮動小数点型へ
df['epochs'] = df['epochs'].astype(int)
df['batch_size'] = df['batch_size'].astype(int)
df['neurons'] = df['neurons'].astype(int)
df['val_loss'] = df['val_loss'].astype(float)
df['processing_time'] = df['processing_time'].astype(float)

# DataFrameの先頭にfilename列を追加する
df.insert(0, 'filename', filename)

# 結果をCSVファイルに保存する
df.to_csv(output_file_path, index=False)
print(f"結果がCSVファイルに保存されました: {output_file_path}")


ファイル内容（一部）:
 Training with 10 epochs, batch size of 32, and 1 neurons...
Validation Loss with 1 Neurons: 0.0706 (Time: 10.67 seconds)
Training with 10 epochs, batch size of 32, and 5 neurons...
Validation Loss with 5 Neurons: 0.0056 (Time: 19.47 seconds)
Training with 10 epochs, batch size of 32, and 10 neurons...
Validation Loss with 10 Neurons: 0.0152 (Time: 13.85 seconds)
Training with 10 epochs, batch size of 32, and 20 neurons...
Validation Loss with 20 Neurons: 0.0207 (Time: 13.27 seconds)
Training wit
抽出されたエントリ数: 191
抽出内容（デバッグ用）: [('10', '32', '1', '0.0706', '10.67'), ('10', '32', '5', '0.0056', '19.47'), ('10', '32', '10', '0.0152', '13.85'), ('10', '32', '20', '0.0207', '13.27'), ('10', '32', '30', '0.0056', '14.36'), ('10', '32', '40', '0.0072', '15.65'), ('10', '32', '50', '0.0034', '14.41'), ('10', '32', '60', '0.0016', '15.41'), ('10', '32', '70', '0.0018', '17.94'), ('10', '32', '80', '0.0018', '16.77'), ('10', '32', '90', '0.0023', '19.58'), ('10', '32', '100', '0.0022',

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
import time
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from google.colab import drive
import json

# Googleドライブをマウント
drive.mount('/content/drive')

def create_dataset(data, time_step=10):
    X, Y = [], []
    for i in range(len(data) - time_step):
        X.append(data[i:i + time_step])
        Y.append(data[i + time_step, 1])
    return np.array(X), np.array(Y)

def save_intermediate_results(filepath, results):
    with open(filepath, 'w') as f:
        json.dump(results, f)

def load_intermediate_results(filepath):
    if os.path.exists(filepath):
        with open(filepath, 'r') as f:
            return json.load(f)
    return {}

input_folder = '/content/drive/MyDrive/program_LSTM/input-data'
intermediate_results_file = '/content/drive/MyDrive/program_LSTM/tuning_result.json'

# エポックとバッチサイズ、ニューロン数の候補を設定
epoch_options = [10, 30, 50, 100]
batch_size_options = [32, 64, 128]
neuron_options = [40, 50, 60, 70, 80]

# 既存の途中結果を読み込む
intermediate_results = load_intermediate_results(intermediate_results_file)

# 全データセットのグローバルな最小損失とそのパラメータを記録
global_best_loss = float('inf')
global_best_params = {}

# 各ファイルに対して処理を実行
for filename in os.listdir(input_folder):
    if not filename.endswith(".csv"):
        continue

    file_path = os.path.join(input_folder, filename)
    df = pd.read_csv(file_path)

    # 必要な列のみ抽出
    required_columns = ['time', 'Nominal strain', 'Nominal stress']
    if not all(column in df.columns for column in required_columns):
        print(f"Required columns not found in {filename}. Skipping this file.")
        continue

    df = df[required_columns]
    df['time'] = pd.to_datetime(df['time'])

    # レスポンスタイムを1秒ごとにリサンプリングして100点にする
    df = df.resample('1S', on='time').mean().interpolate().head(100)

    # スケーリング
    scaler = MinMaxScaler(feature_range=(0, 1))
    df_scaled = scaler.fit_transform(df)

    # データセット作成
    time_step = 10
    X, Y = create_dataset(df_scaled, time_step)

    # データ分割
    train_size = int(len(X) * 0.7)
    X_train, X_test = X[:train_size], X[train_size:]
    Y_train, Y_test = Y[:train_size], Y[train_size:]

    best_loss_overall = float('inf')
    best_params_overall = {}

    mse_values = []  # 各組み合わせのMSEを格納

    for epochs in epoch_options:
        for batch_size in batch_size_options:
            for neurons in neuron_options:
                # 中断したところから再開するために結果をチェック
                result_key = f"{filename}_{epochs}_{batch_size}_{neurons}"
                if result_key in intermediate_results:
                    val_loss = intermediate_results[result_key]
                else:
                    print(f"Training with {epochs} epochs, batch size of {batch_size}, and {neurons} neurons...")

                    # LSTMモデルを構築
                    model = Sequential()
                    model.add(LSTM(neurons, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
                    model.add(LSTM(neurons))
                    model.add(Dense(1))
                    model.compile(optimizer='adam', loss='mean_squared_error')

                    # モデルの訓練
                    history = model.fit(X_train, Y_train, validation_data=(X_test, Y_test),
                                        epochs=epochs, batch_size=batch_size, verbose=0)

                    # 検証損失を取得
                    val_loss = history.history['val_loss'][-1]

                    # 中間結果を保存
                    intermediate_results[result_key] = val_loss
                    save_intermediate_results(intermediate_results_file, intermediate_results)

                mse_values.append(val_loss)

    # 現在のデータセットでの最小のMSEを計算
    min_mse = min(mse_values)

    # 補正された検証損失で評価
    for key, val_loss in intermediate_results.items():
        if key.startswith(filename):
            corrected_val_loss = val_loss - min_mse

            if corrected_val_loss < best_loss_overall:
                best_loss_overall = corrected_val_loss
                best_params_overall = {
                    'params_key': key,
                    'corrected_val_loss': corrected_val_loss
                }

    # 現在のデータセットでの最良のハイパーパラメータを出力
    print(f"Best Parameters for {filename}: {best_params_overall['params_key']}, "
          f"Corrected Validation Loss = {best_loss_overall:.4f}")

    # 全体での最良のハイパーパラメータを更新
    if best_loss_overall < global_best_loss:
        global_best_loss = best_loss_overall
        global_best_params = {
            'filename': filename,
            'params_key': best_params_overall['params_key'],
            'corrected_val_loss': best_loss_overall
        }

# 全データセットに対する最良のハイパーパラメータを出力
print(f"\nGlobal Best Parameters Across All Datasets: Filename = {global_best_params['filename']}, "
      f"Params Key = {global_best_params['params_key']}, "
      f"Corrected Validation Loss = {global_best_params['corrected_val_loss']:.4f}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Best Parameters for B.csv: B.csv_100_128_60, Corrected Validation Loss = 0.0000


  df = df.resample('1S', on='time').mean().interpolate().head(100)
  df = df.resample('1S', on='time').mean().interpolate().head(100)


Best Parameters for F-P.csv: F-P.csv_50_64_60, Corrected Validation Loss = 0.0000


  df = df.resample('1S', on='time').mean().interpolate().head(100)


Best Parameters for F.csv: F.csv_50_32_40, Corrected Validation Loss = 0.0000
Training with 50 epochs, batch size of 128, and 40 neurons...


  df = df.resample('1S', on='time').mean().interpolate().head(100)
  super().__init__(**kwargs)


Training with 50 epochs, batch size of 128, and 50 neurons...
Training with 50 epochs, batch size of 128, and 60 neurons...
Training with 50 epochs, batch size of 128, and 70 neurons...
Training with 50 epochs, batch size of 128, and 80 neurons...
Training with 100 epochs, batch size of 32, and 40 neurons...
Training with 100 epochs, batch size of 32, and 50 neurons...
Training with 100 epochs, batch size of 32, and 60 neurons...
Training with 100 epochs, batch size of 32, and 70 neurons...
Training with 100 epochs, batch size of 32, and 80 neurons...
Training with 100 epochs, batch size of 64, and 40 neurons...
Training with 100 epochs, batch size of 64, and 50 neurons...
Training with 100 epochs, batch size of 64, and 60 neurons...
Training with 100 epochs, batch size of 64, and 70 neurons...
Training with 100 epochs, batch size of 64, and 80 neurons...
Training with 100 epochs, batch size of 128, and 40 neurons...
Training with 100 epochs, batch size of 128, and 50 neurons...
Traini

In [None]:
import os
import time
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from google.colab import drive

# Googleドライブをマウント
drive.mount('/content/drive')

def create_dataset(data, time_step=10):
    X, Y = [], []
    for i in range(len(data) - time_step):
        X.append(data[i:i + time_step])
        Y.append(data[i + time_step, 1])
    return np.array(X), np.array(Y)

input_folder = '/content/drive/MyDrive/program_LSTM/input-data'

# エポックとバッチサイズ、ニューロン数の候補を設定
epoch_options = [10, 30, 50, 100]
batch_size_options = [32, 64, 128, 256]
neuron_options = [1, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]

# 全データセットのグローバルな最小損失とそのパラメータを記録
global_best_loss = float('inf')
global_best_params = {}

# 各ファイルに対して処理を実行
for filename in os.listdir(input_folder):
    if not filename.endswith(".csv"):
        continue

    file_path = os.path.join(input_folder, filename)
    df = pd.read_csv(file_path)

    # 必要な列のみ抽出
    required_columns = ['time', 'Nominal strain', 'Nominal stress']
    if not all(column in df.columns for column in required_columns):
        print(f"Required columns not found in {filename}. Skipping this file.")
        continue

    df = df[required_columns]
    df['time'] = pd.to_datetime(df['time'])

    # レスポンスタイムを1秒ごとにリサンプリングして100点にする
    df = df.resample('1S', on='time').mean().interpolate().head(100)

    # スケーリング
    scaler = MinMaxScaler(feature_range=(0, 1))
    df_scaled = scaler.fit_transform(df)

    # データセット作成
    time_step = 10
    X, Y = create_dataset(df_scaled, time_step)

    # データ分割
    train_size = int(len(X) * 0.7)
    X_train, X_test = X[:train_size], X[train_size:]
    Y_train, Y_test = Y[:train_size], Y[train_size:]

    best_loss_overall = float('inf')
    best_params_overall = {}

    mse_values = []  # 各組み合わせのMSEを格納

    for epochs in epoch_options:
        for batch_size in batch_size_options:
            for neurons in neuron_options:
                print(f"Training with {epochs} epochs, batch size of {batch_size}, and {neurons} neurons...")

                # LSTMモデルを構築
                model = Sequential()
                model.add(LSTM(neurons, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
                model.add(LSTM(neurons))
                model.add(Dense(1))
                model.compile(optimizer='adam', loss='mean_squared_error')

                # モデルの訓練
                history = model.fit(X_train, Y_train, validation_data=(X_test, Y_test),
                                    epochs=epochs, batch_size=batch_size, verbose=0)

                # 検証損失を取得
                val_loss = history.history['val_loss'][-1]
                mse_values.append(val_loss)

    # 現在のデータセットでの最小のMSEを計算
    min_mse = min(mse_values)

    # 再度ループして補正されたval_lossを使って評価
    for epochs in epoch_options:
        for batch_size in batch_size_options:
            for neurons in neuron_options:
                # モデルの訓練
                history = model.fit(X_train, Y_train, validation_data=(X_test, Y_test),
                                    epochs=epochs, batch_size=batch_size, verbose=0)

                # 検証損失と補正済み評価
                val_loss = history.history['val_loss'][-1]
                corrected_val_loss = val_loss - min_mse
                print(f"Corrected Val Loss with {neurons} Neurons: {corrected_val_loss:.4f}")

                # 最良のハイパーパラメータを更新
                if corrected_val_loss < best_loss_overall:
                    best_loss_overall = corrected_val_loss
                    best_params_overall = {
                        'epochs': epochs,
                        'batch_size': batch_size,
                        'neurons': neurons
                    }

    # 現在のデータセットでの最良のハイパーパラメータを出力
    print(f"Best Parameters for {filename}: Epochs = {best_params_overall['epochs']}, "
          f"Batch Size = {best_params_overall['batch_size']}, Neurons = {best_params_overall['neurons']}, "
          f"Corrected Validation Loss = {best_loss_overall:.4f}")

    # 全体での最良のハイパーパラメータを更新
    if best_loss_overall < global_best_loss:
        global_best_loss = best_loss_overall
        global_best_params = {
            'filename': filename,
            'epochs': best_params_overall['epochs'],
            'batch_size': best_params_overall['batch_size'],
            'neurons': best_params_overall['neurons'],
            'corrected_val_loss': best_loss_overall
        }

# 全データセットに対する最良のハイパーパラメータを出力
print(f"\nGlobal Best Parameters Across All Datasets: Filename = {global_best_params['filename']}, "
      f"Epochs = {global_best_params['epochs']}, Batch Size = {global_best_params['batch_size']}, "
      f"Neurons = {global_best_params['neurons']}, Corrected Validation Loss = {global_best_params['corrected_val_loss']:.4f}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Training with 10 epochs, batch size of 32, and 1 neurons...


  df = df.resample('1S', on='time').mean().interpolate().head(100)
  super().__init__(**kwargs)


Training with 10 epochs, batch size of 32, and 5 neurons...
Training with 10 epochs, batch size of 32, and 10 neurons...
Training with 10 epochs, batch size of 32, and 20 neurons...
Training with 10 epochs, batch size of 32, and 30 neurons...
Training with 10 epochs, batch size of 32, and 40 neurons...
Training with 10 epochs, batch size of 32, and 50 neurons...
Training with 10 epochs, batch size of 32, and 60 neurons...
Training with 10 epochs, batch size of 32, and 70 neurons...
Training with 10 epochs, batch size of 32, and 80 neurons...
Training with 10 epochs, batch size of 32, and 90 neurons...
Training with 10 epochs, batch size of 32, and 100 neurons...
Training with 10 epochs, batch size of 64, and 1 neurons...
Training with 10 epochs, batch size of 64, and 5 neurons...
Training with 10 epochs, batch size of 64, and 10 neurons...


KeyboardInterrupt: 

In [None]:
import os
import time
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from google.colab import drive

# Googleドライブをマウント
drive.mount('/content/drive')

def create_dataset(data, time_step=10):
    X, Y = [], []
    for i in range(len(data) - time_step):
        X.append(data[i:i + time_step])
        Y.append(data[i + time_step, 1])
    return np.array(X), np.array(Y)

input_folder = '/content/drive/MyDrive/program_LSTM/input-data'

# エポックとバッチサイズ、ニューロン数の候補を設定
epoch_options = [10, 30, 50, 100]
batch_size_options = [32, 64, 128, 256]
neuron_options = [1, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]

# 各ファイルに対して処理を実行
for filename in os.listdir(input_folder):
    if not filename.endswith(".csv"):
        continue

    file_path = os.path.join(input_folder, filename)
    df = pd.read_csv(file_path)

    # 必要な列のみ抽出
    required_columns = ['time', 'Nominal strain', 'Nominal stress']
    if not all(column in df.columns for column in required_columns):
        print(f"Required columns not found in {filename}. Skipping this file.")
        continue

    df = df[required_columns]
    df['time'] = pd.to_datetime(df['time'])

    # レスポンスタイムを1秒ごとにリサンプリングして100点にする
    df = df.resample('1S', on='time').mean().interpolate().head(100)

    # スケーリング
    scaler = MinMaxScaler(feature_range=(0, 1))
    df_scaled = scaler.fit_transform(df)

    # データセット作成
    time_step = 10
    X, Y = create_dataset(df_scaled, time_step)

    # データ分割
    train_size = int(len(X) * 0.7)
    X_train, X_test = X[:train_size], X[train_size:]
    Y_train, Y_test = Y[:train_size], Y[train_size:]

    best_loss_overall = float('inf')
    best_params_overall = {}

    for epochs in epoch_options:
        for batch_size in batch_size_options:
            best_loss = float('inf')
            best_neurons = 0

            for neurons in neuron_options:
                print(f"Training with {epochs} epochs, batch size of {batch_size}, and {neurons} neurons...")

                # LSTMモデルを構築
                model = Sequential()
                model.add(LSTM(neurons, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
                model.add(LSTM(neurons))
                model.add(Dense(1))
                model.compile(optimizer='adam', loss='mean_squared_error')

                # モデルの訓練
                history = model.fit(X_train, Y_train, validation_data=(X_test, Y_test),
                                    epochs=epochs, batch_size=batch_size, verbose=0)

                # 検証損失を取得
                val_loss = history.history['val_loss'][-1]
                print(f"Validation Loss with {neurons} Neurons: {val_loss:.4f}")

                # 最良のニューロン数を更新
                if val_loss < best_loss:
                    best_loss = val_loss
                    best_neurons = neurons

            # パラメータ結果の比較と更新
            if best_loss < best_loss_overall:
                best_loss_overall = best_loss
                best_params_overall = {
                    'epochs': epochs,
                    'batch_size': batch_size,
                    'neurons': best_neurons
                }

    # 最良のハイパーパラメータを出力
    print(f"Best Parameters for {filename}: Epochs = {best_params_overall['epochs']}, "
          f"Batch Size = {best_params_overall['batch_size']}, Neurons = {best_params_overall['neurons']}, "
          f"Validation Loss = {best_loss_overall:.4f}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


  df = df.resample('1S', on='time').mean().interpolate().head(100)
  super().__init__(**kwargs)


Training with 10 epochs, batch size of 32, and 1 neurons...
Validation Loss with 1 Neurons: 0.6073
Training with 10 epochs, batch size of 32, and 5 neurons...
Validation Loss with 5 Neurons: 0.2018
Training with 10 epochs, batch size of 32, and 10 neurons...
Validation Loss with 10 Neurons: 0.2821
Training with 10 epochs, batch size of 32, and 20 neurons...
Validation Loss with 20 Neurons: 0.0657
Training with 10 epochs, batch size of 32, and 30 neurons...
Validation Loss with 30 Neurons: 0.0315
Training with 10 epochs, batch size of 32, and 40 neurons...
Validation Loss with 40 Neurons: 0.0117
Training with 10 epochs, batch size of 32, and 50 neurons...
Validation Loss with 50 Neurons: 0.0150
Training with 10 epochs, batch size of 32, and 60 neurons...
Validation Loss with 60 Neurons: 0.0098
Training with 10 epochs, batch size of 32, and 70 neurons...
Validation Loss with 70 Neurons: 0.0086
Training with 10 epochs, batch size of 32, and 80 neurons...
Validation Loss with 80 Neurons: 0

KeyboardInterrupt: 