In [7]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, GRU, Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score

# 1. 讀取與基本處理
file_path = 'IMPACT.sensors.csv'
data = pd.read_csv(file_path)

timestamp_column = 'createdAt'
data[timestamp_column] = pd.to_datetime(data[timestamp_column])
data = data.sort_values(by=timestamp_column)

# 2. 將特徵值先四捨五入到兩位小數（若原CSV只有兩位，可省略這一步）
features = ['conductivity', 'ppm']
data_features = data[features].round(2)

# 3. 進行 MinMaxScaler
scaler = MinMaxScaler()
data_normalized = scaler.fit_transform(data_features)

# 定義sequence參數（同你的原始碼）
sequence_length = 12
prediction_steps = [6, 12, 18, 24, 30, 36, 42, 48]

def create_future_sequences(data, sequence_length, prediction_steps):
    X, y = [], []
    for i in range(len(data) - sequence_length - max(prediction_steps)):
        X.append(data[i:i + sequence_length])
        # 以每一段未來時間段的 6 筆資料做平均
        future_values = [
            np.mean(data[i + sequence_length + p : i + sequence_length + p + 6], axis=0)
            for p in prediction_steps
        ]
        y.append(np.array(future_values).flatten())
    return np.array(X), np.array(y)

X, y = create_future_sequences(data_normalized, sequence_length, prediction_steps)

# 分訓練、測試集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. 建立 GRU 模型
input_layer = Input(shape=(sequence_length, len(features)))
x = GRU(64, activation='relu', return_sequences=True)(input_layer)
x = GRU(32, activation='relu', return_sequences=False)(x)
dense1 = Dense(64, activation='relu')(x)
output_layer = Dense(len(features) * len(prediction_steps), activation='linear')(dense1)

model = Model(inputs=input_layer, outputs=output_layer)
model.compile(optimizer='adam', loss='mse')

# 5. 訓練
history = model.fit(X_train, y_train, epochs=42, batch_size=32, validation_data=(X_test, y_test))

# 6. 評估 (在「縮放後」的測試資料上先得到 MSE)
test_loss = model.evaluate(X_test, y_test)
print("Test Loss (MSE, scaled):", test_loss)

# 7. 預測
y_pred = model.predict(X_test)
y_pred = y_pred.reshape(y_test.shape)

# 8. 反轉換 (inverse_transform) 到原始尺度並取小數點後兩位
#
#   y_pred, y_test 的 shape: (樣本數, len(features)*len(prediction_steps))
#   需要先 reshape 成 (樣本數 * 預測步數, len(features)) 再 inverse_transform
#
#   這樣做完後才能得到真正「原始尺度」下的預測值
def inverse_transform_2dec(y, scaler, features, prediction_steps):
    samples = y.shape[0]
    n_features = len(features)
    n_steps = len(prediction_steps)

    # (samples, n_steps*n_features) -> (samples*n_steps, n_features)
    y_reshaped = y.reshape(samples * n_steps, n_features)
    y_inverted = scaler.inverse_transform(y_reshaped)

    # 再 reshape 回 (samples, n_steps*n_features)
    y_inverted = y_inverted.reshape(samples, n_steps * n_features)

    # 取小數點2位
    y_inverted_2dec = np.round(y_inverted, 2)
    return y_inverted_2dec

y_pred_orig = inverse_transform_2dec(y_pred, scaler, features, prediction_steps)
y_test_orig = inverse_transform_2dec(y_test, scaler, features, prediction_steps)

# 9. 用「原始尺度」做評估
def evaluate_predictions_in_original_scale(y_true_orig, y_pred_orig, features, prediction_steps):
    print("\nStep-wise Performance Evaluation (Original Scale):")
    for idx, step in enumerate(prediction_steps):
        hours = (step * 10) // 60
        minutes = (step * 10) % 60
        for feature_idx, feature in enumerate(features):
            y_true_step = y_true_orig[:, idx * len(features) + feature_idx]
            y_pred_step = y_pred_orig[:, idx * len(features) + feature_idx]

            r2 = r2_score(y_true_step, y_pred_step)
            mae = mean_absolute_error(y_true_step, y_pred_step)
            print(f"{hours}h{minutes:02d}min - {feature}: R²={r2:.2f}, MAE={mae:.2f}")

    overall_r2 = r2_score(y_true_orig.flatten(), y_pred_orig.flatten())
    overall_mae = mean_absolute_error(y_true_orig.flatten(), y_pred_orig.flatten())
    print("\nOverall Performance (Original Scale):")
    print(f"Overall R²: {overall_r2:.2f}")
    print(f"Overall MAE: {overall_mae:.2f}")

evaluate_predictions_in_original_scale(y_test_orig, y_pred_orig, features, prediction_steps)

# 10. 保存模型
model.save('tds_dissolved_solid_conductivity_gru_model.keras')


Epoch 1/42
Epoch 2/42
Epoch 3/42
Epoch 4/42
Epoch 5/42
Epoch 6/42
Epoch 7/42
Epoch 8/42
Epoch 9/42
Epoch 10/42
Epoch 11/42
Epoch 12/42
Epoch 13/42
Epoch 14/42
Epoch 15/42
Epoch 16/42
Epoch 17/42
Epoch 18/42
Epoch 19/42
Epoch 20/42
Epoch 21/42
Epoch 22/42
Epoch 23/42
Epoch 24/42
Epoch 25/42
Epoch 26/42
Epoch 27/42
Epoch 28/42
Epoch 29/42
Epoch 30/42
Epoch 31/42
Epoch 32/42
Epoch 33/42
Epoch 34/42
Epoch 35/42
Epoch 36/42
Epoch 37/42
Epoch 38/42
Epoch 39/42
Epoch 40/42
Epoch 41/42
Epoch 42/42
Test Loss (MSE, scaled): 0.00010067705443361774

Step-wise Performance Evaluation (Original Scale):
1h00min - conductivity: R²=0.73, MAE=0.00
1h00min - ppm: R²=0.94, MAE=1.53
2h00min - conductivity: R²=0.51, MAE=0.00
2h00min - ppm: R²=0.94, MAE=1.59
3h00min - conductivity: R²=0.50, MAE=0.00
3h00min - ppm: R²=0.93, MAE=1.82
4h00min - conductivity: R²=0.67, MAE=0.00
4h00min - ppm: R²=0.92, MAE=2.03
5h00min - conductivity: R²=0.65, MAE=0.00
5h00min - ppm: R²=0.92, MAE=1.86
6h00min - conductivity: R²=0.6

In [9]:
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, GRU, Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score

# 1. 讀取與基本處理
file_path = 'IMPACT.sensors.csv'
data = pd.read_csv(file_path)

timestamp_column = 'createdAt'
data[timestamp_column] = pd.to_datetime(data[timestamp_column])
data = data.sort_values(by=timestamp_column)

# 2. 選擇特徵並四捨五入到兩位小數
features = ['conductivity', 'ppm']
data_features = data[features].round(2)

# 3. 進行 MinMaxScaler
scaler = MinMaxScaler()
data_normalized = scaler.fit_transform(data_features)

# **儲存 Scaler**
scaler_filename = "tds_dissolved_solid_conductivity_scaler.pkl"
joblib.dump(scaler, scaler_filename)
print(f"Scaler saved as {scaler_filename}")

# 定義 sequence 參數
sequence_length = 12
prediction_steps = [6, 12, 18, 24, 30, 36, 42, 48]

def create_future_sequences(data, sequence_length, prediction_steps):
    X, y = [], []
    for i in range(len(data) - sequence_length - max(prediction_steps)):
        X.append(data[i:i + sequence_length])
        # 以每一段未來時間段的 6 筆資料做平均
        future_values = [
            np.mean(data[i + sequence_length + p : i + sequence_length + p + 6], axis=0)
            for p in prediction_steps
        ]
        y.append(np.array(future_values).flatten())
    return np.array(X), np.array(y)

X, y = create_future_sequences(data_normalized, sequence_length, prediction_steps)

# 分訓練、測試集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. 建立 GRU 模型
input_layer = Input(shape=(sequence_length, len(features)))
x = GRU(64, activation='relu', return_sequences=True)(input_layer)
x = GRU(32, activation='relu', return_sequences=False)(x)
dense1 = Dense(64, activation='relu')(x)
output_layer = Dense(len(features) * len(prediction_steps), activation='linear')(dense1)

model = Model(inputs=input_layer, outputs=output_layer)
model.compile(optimizer='adam', loss='mse')

# 5. 訓練
history = model.fit(X_train, y_train, epochs=42, batch_size=32, validation_data=(X_test, y_test))

# 6. 評估
test_loss = model.evaluate(X_test, y_test)
print("Test Loss (MSE, scaled):", test_loss)

# 7. 預測
y_pred = model.predict(X_test)
y_pred = y_pred.reshape(y_test.shape)

# 8. 反轉換 (inverse_transform) 到原始尺度並取小數點後兩位
def inverse_transform_2dec(y, scaler, features, prediction_steps):
    samples = y.shape[0]
    n_features = len(features)
    n_steps = len(prediction_steps)

    # (samples, n_steps*n_features) -> (samples*n_steps, n_features)
    y_reshaped = y.reshape(samples * n_steps, n_features)
    y_inverted = scaler.inverse_transform(y_reshaped)

    # 再 reshape 回 (samples, n_steps*n_features)
    y_inverted = y_inverted.reshape(samples, n_steps * n_features)

    # 取小數點2位
    y_inverted_2dec = np.round(y_inverted, 2)
    return y_inverted_2dec

y_pred_orig = inverse_transform_2dec(y_pred, scaler, features, prediction_steps)
y_test_orig = inverse_transform_2dec(y_test, scaler, features, prediction_steps)

# 9. 用「原始尺度」做評估
def evaluate_predictions_in_original_scale(y_true_orig, y_pred_orig, features, prediction_steps):
    print("\nStep-wise Performance Evaluation (Original Scale):")
    for idx, step in enumerate(prediction_steps):
        hours = (step * 10) // 60
        minutes = (step * 10) % 60
        for feature_idx, feature in enumerate(features):
            y_true_step = y_true_orig[:, idx * len(features) + feature_idx]
            y_pred_step = y_pred_orig[:, idx * len(features) + feature_idx]

            r2 = r2_score(y_true_step, y_pred_step)
            mae = mean_absolute_error(y_true_step, y_pred_step)
            print(f"{hours}h{minutes:02d}min - {feature}: R²={r2:.2f}, MAE={mae:.2f}")

    overall_r2 = r2_score(y_true_orig.flatten(), y_pred_orig.flatten())
    overall_mae = mean_absolute_error(y_true_orig.flatten(), y_pred_orig.flatten())
    print("\nOverall Performance (Original Scale):")
    print(f"Overall R²: {overall_r2:.2f}")
    print(f"Overall MAE: {overall_mae:.2f}")

evaluate_predictions_in_original_scale(y_test_orig, y_pred_orig, features, prediction_steps)

# 10. 保存模型
model.save('tds_dissolved_solid_conductivity_gru_model.keras')

Scaler saved as scaler_conductivity_ppm.pkl
Epoch 1/42
Epoch 2/42
Epoch 3/42
Epoch 4/42
Epoch 5/42
Epoch 6/42
Epoch 7/42
Epoch 8/42
Epoch 9/42
Epoch 10/42
Epoch 11/42
Epoch 12/42
Epoch 13/42
Epoch 14/42
Epoch 15/42
Epoch 16/42
Epoch 17/42
Epoch 18/42
Epoch 19/42
Epoch 20/42
Epoch 21/42
Epoch 22/42
Epoch 23/42
Epoch 24/42
Epoch 25/42
Epoch 26/42
Epoch 27/42
Epoch 28/42
Epoch 29/42
Epoch 30/42
Epoch 31/42
Epoch 32/42
Epoch 33/42
Epoch 34/42
Epoch 35/42
Epoch 36/42
Epoch 37/42
Epoch 38/42
Epoch 39/42
Epoch 40/42
Epoch 41/42
Epoch 42/42
Test Loss (MSE, scaled): 0.00010238006507279351

Step-wise Performance Evaluation (Original Scale):
1h00min - conductivity: R²=0.76, MAE=0.00
1h00min - ppm: R²=0.94, MAE=1.52
2h00min - conductivity: R²=0.50, MAE=0.00
2h00min - ppm: R²=0.94, MAE=1.68
3h00min - conductivity: R²=0.52, MAE=0.00
3h00min - ppm: R²=0.93, MAE=1.84
4h00min - conductivity: R²=0.65, MAE=0.00
4h00min - ppm: R²=0.93, MAE=2.03
5h00min - conductivity: R²=0.65, MAE=0.00
5h00min - ppm: R²=0