In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LSTM, Concatenate, Multiply
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, r2_score
import matplotlib.pyplot as plt

In [None]:
# Load the dataset
data = pd.read_csv('./changhua_data.csv')

# Extract columns
# Assume: First column is time, second column is flow, third to ninth are rainfall data
times = pd.to_datetime(data.iloc[:, 0].values)  # Convert time to datetime
flows = data.iloc[:, 1].values.reshape(-1, 1)
rainfalls = data.iloc[:, 2:].values
print("rainfall shape",rainfalls.shape)

# Create sequences for time-series prediction
def create_sequences(data, target, input_steps, output_steps):
    X, y, times_seq = [], [], []
    for i in range(len(data) - input_steps - output_steps + 1):
        X.append(data[i:(i + input_steps), :])
        y.append(target[(i + input_steps):(i + input_steps + output_steps), 0])
        times_seq.append(times[i + input_steps:(i + input_steps + output_steps)])
    return np.array(X), np.array(y), np.array(times_seq)

input_steps = 12  # Use past 12 hours
output_steps = 6  # Predict next 6 hours

# Data preprocessing
scaler_flow = MinMaxScaler()
flows_scaled = scaler_flow.fit_transform(flows)

scaler_rainfall = MinMaxScaler()
rainfalls_scaled = scaler_rainfall.fit_transform(rainfalls)

# Combine rainfall and flow data for sequence creation
combined_data = np.concatenate((rainfalls_scaled, flows_scaled), axis=1)

# Create sequences
X, y, times_seq = create_sequences(combined_data, flows_scaled, input_steps, output_steps)

# Train-test split 不按照时间顺序分割
# X_train, X_test, y_train, y_test, times_train, times_test = train_test_split(X, y, times_seq, test_size=0.2, random_state=42)

# Train-test split using time-based split to maintain sequence continuity 按照时间顺序分割
split_index = int(len(X) * 0.8)
X_train, X_test = X[:split_index], X[split_index:]
y_train, y_test = y[:split_index], y[split_index:]
times_train, times_test = times_seq[:split_index], times_seq[split_index:]

In [None]:
# Plot actual vs predicted flows 全部时间步
plt.figure(figsize=(20, 5))
plt.plot(y_test_inverse.flatten(), label='Actual Flow')
plt.plot(predicted_flows.flatten(), label='Predicted Flow')
plt.xlabel('Time Steps')
plt.ylabel('Flow')
plt.legend()
plt.title('Actual vs Predicted Flow')
plt.show()