In [3]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, GRU, Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score

# Load dataset
file_path = 'IMPACT.sensors.csv'
data = pd.read_csv(file_path)

# Ensure timestamp exists
timestamp_column = 'createdAt'
data[timestamp_column] = pd.to_datetime(data[timestamp_column])

# Sort data
data = data.sort_values(by=timestamp_column)

# Select features and target
features = ['temperature']
data_features = data[features]

# Normalize data
scaler = MinMaxScaler()
data_normalized = scaler.fit_transform(data_features)

# Save the scaler
import joblib
joblib.dump(scaler, 'temperature_scaler.pkl')

# Define sequence length and prediction steps
sequence_length = 12  # Using past 10 steps (100 min)
prediction_steps = [6, 12, 18, 24, 30, 36, 42, 48]  # Predict 1-8 hours ahead (10 min intervals)

# Prepare sequences and targets
def create_future_sequences(data, sequence_length, prediction_steps):
    X, y = [], []
    for i in range(len(data) - sequence_length - max(prediction_steps)):
        X.append(data[i:i + sequence_length])
        future_values = [np.mean(data[i + sequence_length + p: i + sequence_length + p + 6]) for p in prediction_steps]
        y.append(np.array(future_values).flatten())
    return np.array(X), np.array(y)

X, y = create_future_sequences(data_normalized, sequence_length, prediction_steps)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build GRU Model
input_layer = Input(shape=(sequence_length, len(features)))
x = GRU(64, activation='relu', return_sequences=True)(input_layer)
x = GRU(32, activation='relu', return_sequences=False)(x)
dense1 = Dense(64, activation='relu')(x)
output_layer = Dense(len(prediction_steps), activation='linear')(dense1)

# Build and compile model
model = Model(inputs=input_layer, outputs=output_layer)
model.compile(optimizer='adam', loss='mse')

# Train the model
history = model.fit(X_train, y_train, epochs=42, batch_size=32, validation_data=(X_test, y_test))

# Evaluate the model
test_loss = model.evaluate(X_test, y_test)
print("Test Loss (MSE):", test_loss)

# Calculate additional evaluation metrics
y_pred = model.predict(X_test)

def evaluate_predictions(y_true, y_pred, prediction_steps):
    print("\nStep-wise Performance Evaluation:")
    for idx, step in enumerate(prediction_steps):
        hours = (step * 10) // 60
        minutes = (step * 10) % 60
        y_true_step = y_true[:, idx]
        y_pred_step = y_pred[:, idx]

        r2 = r2_score(y_true_step, y_pred_step)
        mae = mean_absolute_error(y_true_step, y_pred_step)
        print(f"{hours}h{minutes:02d}min: R²={r2:.2f}, MAE={mae:.2f}")

    overall_r2 = r2_score(y_test, y_pred)
    overall_mae = mean_absolute_error(y_test, y_pred)
    print("\nOverall Performance:")
    print(f"Overall R² Score: {overall_r2:.2f}")
    print(f"Overall Mean Absolute Error (MAE): {overall_mae:.2f}")

evaluate_predictions(y_test, y_pred, prediction_steps)

# Save the model
model.save('temperature_prediction_gru_model.keras')


Epoch 1/42
Epoch 2/42
Epoch 3/42
Epoch 4/42
Epoch 5/42
Epoch 6/42
Epoch 7/42
Epoch 8/42
Epoch 9/42
Epoch 10/42
Epoch 11/42
Epoch 12/42
Epoch 13/42
Epoch 14/42
Epoch 15/42
Epoch 16/42
Epoch 17/42
Epoch 18/42
Epoch 19/42
Epoch 20/42
Epoch 21/42
Epoch 22/42
Epoch 23/42
Epoch 24/42
Epoch 25/42
Epoch 26/42
Epoch 27/42
Epoch 28/42
Epoch 29/42
Epoch 30/42
Epoch 31/42
Epoch 32/42
Epoch 33/42
Epoch 34/42
Epoch 35/42
Epoch 36/42
Epoch 37/42
Epoch 38/42
Epoch 39/42
Epoch 40/42
Epoch 41/42
Epoch 42/42
Test Loss (MSE): 0.0004563122056424618

Step-wise Performance Evaluation:
1h00min: R²=0.99, MAE=0.01
2h00min: R²=0.99, MAE=0.01
3h00min: R²=0.99, MAE=0.01
4h00min: R²=0.98, MAE=0.01
5h00min: R²=0.98, MAE=0.01
6h00min: R²=0.98, MAE=0.01
7h00min: R²=0.98, MAE=0.02
8h00min: R²=0.97, MAE=0.02

Overall Performance:
Overall R² Score: 0.98
Overall Mean Absolute Error (MAE): 0.01


In [9]:
# Import necessary libraries
import pandas as pd
import numpy as np
import joblib
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, GRU, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import joblib

# ------------------------------
# 1. Data Reading and Preprocessing
# ------------------------------
file_path = 'IMPACT.sensors.csv'
data = pd.read_csv(file_path)

# Timestamp processing
timestamp_col = 'createdAt'
data[timestamp_col] = pd.to_datetime(data[timestamp_col])
data = data.sort_values(by=timestamp_col).reset_index(drop=True)

# ------------------------------
# 2. Feature Selection
# ------------------------------
features = ['temperature']
time_features = ['hour', 'day_of_week', 'is_weekend']

# Create time-related features
data['hour'] = data[timestamp_col].dt.hour
data['day_of_week'] = data[timestamp_col].dt.dayofweek
data['is_weekend'] = data['day_of_week'].isin([5, 6]).astype(int)

# Combine all features
data_features = data[features + time_features]

# Normalize the data
scaler = MinMaxScaler()
data_normalized = scaler.fit_transform(data_features)

# ------------------------------
# 3. Define Sequence Parameters
# ------------------------------
sequence_length = 12
prediction_steps = [6, 12, 18, 24, 30, 36, 42, 48]

# ------------------------------
# 4. Create Sequences and Targets
# ------------------------------
def create_future_sequences(data, sequence_length, prediction_steps):
    X, y = [], []
    for i in range(len(data) - sequence_length - max(prediction_steps)):
        X.append(data[i: i + sequence_length])
        future_vals = [
            np.mean(data[i + sequence_length + p: i + sequence_length + p + 6, 0])  # 0: temperature
            for p in prediction_steps
        ]
        y.append(future_vals)
    return np.array(X), np.array(y)

X, y = create_future_sequences(data_normalized, sequence_length, prediction_steps)

# ------------------------------
# 5. Train-Test Split
# ------------------------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# ------------------------------
# 6. Build GRU Model
# ------------------------------
input_layer = Input(shape=(sequence_length, len(features + time_features)))

x = GRU(128, activation='tanh', return_sequences=True)(input_layer)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)

x = GRU(64, activation='tanh')(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)

x = Dense(64, activation='relu')(x)
output_layer = Dense(len(prediction_steps), activation='linear')(x)

model = Model(inputs=input_layer, outputs=output_layer)
model.compile(optimizer='adam', loss='mse')

# ------------------------------
# 7. Training
# ------------------------------
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', patience=5, factor=0.5, min_lr=1e-6)

history = model.fit(
    X_train, y_train,
    epochs=42,
    batch_size=32,
    validation_data=(X_test, y_test),
    callbacks=[early_stop, lr_scheduler],
    verbose=1
)

# ------------------------------
# 8. Evaluation
# ------------------------------
def evaluate_predictions(y_true, y_pred, steps):
    print("\nStep-wise Performance Evaluation:")
    for idx, step in enumerate(steps):
        # 假设每个数据点间隔为10分钟
        hours = (step * 10) // 60
        minutes = (step * 10) % 60

        true_step = y_true[:, idx]
        pred_step = y_pred[:, idx]

        mae = mean_absolute_error(true_step, pred_step)
        r2 = r2_score(true_step, pred_step)
        print(f"预测 {hours}h{minutes:02d}min 后 -> MAE={mae:.4f}, R²={r2:.4f}")

    # 整体评估
    overall_mae = mean_absolute_error(y_true, y_pred)
    overall_r2 = r2_score(y_true.flatten(), y_pred.flatten())
    print("\nOverall Performance:")
    print(f"Overall MAE: {overall_mae:.4f}")
    print(f"Overall R²: {overall_r2:.4f}")

evaluate_predictions(y_test, y_pred, prediction_steps)

# ------------------------------
# 9. Save Model and Scaler
# ------------------------------
model.save('temperature_prediction_gru_model.keras')
joblib.dump(scaler, 'temperature_scaler.pkl')


Epoch 1/42
Epoch 2/42
Epoch 3/42
Epoch 4/42
Epoch 5/42
Epoch 6/42
Epoch 7/42
Epoch 8/42
Epoch 9/42
Epoch 10/42
Epoch 11/42
Epoch 12/42
Epoch 13/42
Epoch 14/42
Epoch 15/42
Epoch 16/42
Epoch 17/42
Epoch 18/42
Epoch 19/42
Epoch 20/42
Epoch 21/42
Epoch 22/42
Epoch 23/42
Epoch 24/42
Epoch 25/42
Epoch 26/42
Epoch 27/42
Epoch 28/42
Epoch 29/42
Epoch 30/42
Epoch 31/42

Step-wise Performance Evaluation:
预测 1h00min 后 -> MAE=0.0097, R²=0.9783
预测 2h00min 后 -> MAE=0.0104, R²=0.9740
预测 3h00min 后 -> MAE=0.0100, R²=0.9767
预测 4h00min 后 -> MAE=0.0118, R²=0.9672
预测 5h00min 后 -> MAE=0.0115, R²=0.9691
预测 6h00min 后 -> MAE=0.0142, R²=0.9533
预测 7h00min 后 -> MAE=0.0144, R²=0.9551
预测 8h00min 后 -> MAE=0.0151, R²=0.9520

Overall Performance:
Overall MAE: 0.0121
Overall R²: 0.9656


['temperature_scaler.pkl']