In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense
from tensorflow.keras.callbacks import EarlyStopping
import warnings
import os
import joblib
warnings.filterwarnings('ignore')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
df = pd.read_csv('/content/drive/MyDrive/ScienceProjects/WaterLevelPredict/data/mucnuoc_gio_preprocess.csv')
df.head(3)


Unnamed: 0,date,q120,q55,q64,q66,q69
0,2014-01-01 01:00:00,-0.94,-8.0,-4.58,-1.45,-9.01
1,2014-01-01 03:00:00,-0.94,-7.98,-4.57,-1.45,-9.0
2,2014-01-01 05:00:00,-0.94,-7.95,-4.58,-1.45,-9.0


In [None]:
# Thi·∫øt l·∫≠p features v√† target
features = ['q64']
target = 'q64'

# Chu·∫©n h√≥a d·ªØ li·ªáu
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df[features])
scaled_df = pd.DataFrame(scaled_data, columns=features)

In [None]:
def create_dataset_and_train_model_fixed(past_window, future_window, scaled_data, features, target, scaler):
    """
    T·∫°o dataset v√† train model v·ªõi window size c·ª• th·ªÉ - PHI√äN B·∫¢N S·ª¨A L·ªñI
    """
    print(f"\nüîÑ Training v·ªõi Past Window: {past_window}, Future Window: {future_window}")

    # T·∫°o d·ªØ li·ªáu window - FIXED VERSION
    X, y = [], []
    target_idx = features.index(target)

    for i in range(len(scaled_data) - past_window - future_window):
        # Input: past_window timesteps v·ªõi t·∫•t c·∫£ features
        X_window = scaled_data[i:i+past_window]

        # Output: future_window timesteps ch·ªâ v·ªõi target feature
        # THAY ƒê·ªîI QUAN TR·ªåNG: Kh√¥ng d√πng .mean() n·ªØa!
        y_sequence = scaled_data[i+past_window:i+past_window+future_window, target_idx]

        X.append(X_window)
        y.append(y_sequence)

    X = np.array(X, dtype=np.float32)  # Shape: (samples, past_window, n_features)
    y = np.array(y, dtype=np.float32)  # Shape: (samples, future_window)

    print(f"üìä K√≠ch th∆∞·ªõc d·ªØ li·ªáu: X={X.shape}, y={y.shape}")

    # Chia train/test
    split_idx = int(len(X) * 0.8)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    # T·∫°o model - C·∫¶N S·ª¨A ARCHITECTURE
    model = Sequential([
        SimpleRNN(64, return_sequences=True, input_shape=(past_window, len(features))),
        SimpleRNN(32, return_sequences=False),
        Dense(64, activation='relu'),
        Dense(future_window)  # Output = future_window timesteps
    ])

    model.compile(optimizer='adam', loss='mse', metrics=['mae'])

    # Early Stopping callback
    early_stopping = EarlyStopping(
        monitor='val_loss',
        patience=15,
        restore_best_weights=True,
        verbose=1
    )

    # Train model
    print("üöÄ B·∫Øt ƒë·∫ßu training...")
    history = model.fit(
        X_train, y_train,
        epochs=20,
        batch_size=32,
        validation_data=(X_test, y_test),
        callbacks=[early_stopping],
        verbose=0
    )

    # D·ª± ƒëo√°n
    y_pred = model.predict(X_test, verbose=0)  # Shape: (test_samples, future_window)

    # Chuy·ªÉn ƒë·ªïi ng∆∞·ª£c v·ªÅ ƒë∆°n v·ªã g·ªëc
    q64_index = features.index('q64')
    q64_min = scaler.data_min_[q64_index]
    q64_max = scaler.data_max_[q64_index]

    y_pred_inv = y_pred * (q64_max - q64_min) + q64_min
    y_test_inv = y_test * (q64_max - q64_min) + q64_min

    # ƒê√°nh gi√° - c√≥ th·ªÉ t√≠nh cho t·ª´ng timestep ho·∫∑c overall
    # C√°ch 1: Overall metrics (flatten all predictions)
    mae_overall = mean_absolute_error(y_test_inv.flatten(), y_pred_inv.flatten())
    mse_overall = mean_squared_error(y_test_inv.flatten(), y_pred_inv.flatten())
    rmse_overall = np.sqrt(mse_overall)
    r2_overall = r2_score(y_test_inv.flatten(), y_pred_inv.flatten())

    # C√°ch 2: Per-timestep metrics
    timestep_metrics = []
    for t in range(future_window):
        mae_t = mean_absolute_error(y_test_inv[:, t], y_pred_inv[:, t])
        r2_t = r2_score(y_test_inv[:, t], y_pred_inv[:, t])
        timestep_metrics.append({'timestep': t+1, 'mae': mae_t, 'r2': r2_t})

    print(f"‚úÖ Overall - MAE: {mae_overall:.4f}, RMSE: {rmse_overall:.4f}, R¬≤: {r2_overall:.4f}")
    print(f"üìà Stopped at epoch: {len(history.history['loss'])}")

    # In metrics cho v√†i timestep ƒë·∫ßu
    print("üîç Per-timestep performance (first 5):")
    for i in range(min(5, len(timestep_metrics))):
        t_metric = timestep_metrics[i]
        print(f"   Timestep {t_metric['timestep']}: MAE={t_metric['mae']:.4f}, R¬≤={t_metric['r2']:.4f}")

    return {
        'past_window': past_window,
        'future_window': future_window,
        'y_test': y_test_inv,          # Shape: (samples, future_window)
        'y_pred': y_pred_inv,          # Shape: (samples, future_window)
        'mae': mae_overall,
        'mse': mse_overall,
        'rmse': rmse_overall,
        'r2': r2_overall,
        'timestep_metrics': timestep_metrics,
        'history': history,
        'epochs_trained': len(history.history['loss']),
        'model': model,  # Th√™m model v√†o k·∫øt qu·∫£ tr·∫£ v·ªÅ
        'scaler': scaler
    }


In [None]:
# T·∫°o th∆∞ m·ª•c ƒë·ªÉ l∆∞u models
model_save_dir = "saved_models"
if not os.path.exists(model_save_dir):
    os.makedirs(model_save_dir)
# ƒê·ªãnh nghƒ©a c√°c window sizes c·∫ßn test
window_sizes = [2,12,24,36,72]
results = []

print("üéØ B·∫Øt ƒë·∫ßu training v·ªõi c√°c window sizes kh√°c nhau...")
print("=" * 60)

# Ch·∫°y training cho t·ª´ng window size
for window_size in window_sizes:
    past_window = 24
    future_window = window_size

    result = create_dataset_and_train_model_fixed(
        past_window, future_window,
        scaled_df[features].values.astype(np.float32),
        features, target, scaler
    )
    # L∆∞u model v√† scaler
    model_filename = f"{model_save_dir}/rnn_model_window_{window_size}.h5"
    scaler_filename = f"{model_save_dir}/scaler_window_{window_size}.pkl"

    # L∆∞u model
    result['model'].save(model_filename)
    print(f"üíæ ƒê√£ l∆∞u model: {model_filename}")

    # L∆∞u scaler
    joblib.dump(result['scaler'], scaler_filename)
    print(f"üíæ ƒê√£ l∆∞u scaler: {scaler_filename}")

    # L∆∞u th√¥ng tin c·∫•u h√¨nh model
    config_filename = f"{model_save_dir}/config_window_{window_size}.pkl"
    model_config = {
        'past_window': past_window,
        'future_window': future_window,
        'features': features,
        'target': target,
        'mae': result['mae'],
        'rmse': result['rmse'],
        'r2': result['r2'],
        'epochs_trained': result['epochs_trained']
    }
    joblib.dump(model_config, config_filename)
    print(f"üíæ ƒê√£ l∆∞u config: {config_filename}")

    # X√≥a model v√† scaler kh·ªèi result ƒë·ªÉ ti·∫øt ki·ªám b·ªô nh·ªõ
    result_copy = result.copy()
    del result_copy['model']
    del result_copy['scaler']
    results.append(result_copy)

    print(f"‚úÖ Ho√†n th√†nh v√† l∆∞u model cho window size {window_size}")
    print("-" * 40)

print("\n" + "=" * 60)
print("üéâ Ho√†n th√†nh training t·∫•t c·∫£ c√°c window sizes!")

# T·∫°o b·∫£ng so s√°nh k·∫øt qu·∫£
print("\nüìä B·∫¢NG SO S√ÅNH K·∫æT QU·∫¢:")
print("-" * 80)
print(f"{'Window Size':<12} {'MAE':<8} {'RMSE':<8} {'R¬≤':<8} {'Epochs':<8}")
print("-" * 80)
for result in results:
    print(f"{result['past_window']:<12} {result['mae']:<8.4f} {result['rmse']:<8.4f} {result['r2']:<8.4f} {result['epochs_trained']:<8}")

# T√¨m model t·ªët nh·∫•t
best_result = max(results, key=lambda x: x['r2'])
print(f"\nüèÜ Model t·ªët nh·∫•t: Window Size = {best_result['past_window']} (R¬≤ = {best_result['r2']:.4f})")

# Hi·ªÉn th·ªã danh s√°ch files ƒë√£ l∆∞u
print(f"\nüìÅ C√°c files ƒë√£ ƒë∆∞·ª£c l∆∞u trong th∆∞ m·ª•c '{model_save_dir}':")
for window_size in window_sizes:
    print(f"   - rnn_model_window_{window_size}.h5")
    print(f"   - scaler_window_{window_size}.pkl")
    print(f"   - config_window_{window_size}.pkl")


üéØ B·∫Øt ƒë·∫ßu training v·ªõi c√°c window sizes kh√°c nhau...

üîÑ Training v·ªõi Past Window: 24, Future Window: 2
üìä K√≠ch th∆∞·ªõc d·ªØ li·ªáu: X=(47477, 24, 1), y=(47477, 2)
üöÄ B·∫Øt ƒë·∫ßu training...
Restoring model weights from the end of the best epoch: 20.




‚úÖ Overall - MAE: 0.0110, RMSE: 0.0160, R¬≤: 0.9991
üìà Stopped at epoch: 20
üîç Per-timestep performance (first 5):
   Timestep 1: MAE=0.0081, R¬≤=0.9995
   Timestep 2: MAE=0.0140, R¬≤=0.9986
üíæ ƒê√£ l∆∞u model: saved_models/rnn_model_window_2.h5
üíæ ƒê√£ l∆∞u scaler: saved_models/scaler_window_2.pkl
üíæ ƒê√£ l∆∞u config: saved_models/config_window_2.pkl
‚úÖ Ho√†n th√†nh v√† l∆∞u model cho window size 2
----------------------------------------

üîÑ Training v·ªõi Past Window: 24, Future Window: 12
üìä K√≠ch th∆∞·ªõc d·ªØ li·ªáu: X=(47467, 24, 1), y=(47467, 12)
üöÄ B·∫Øt ƒë·∫ßu training...
Restoring model weights from the end of the best epoch: 15.




‚úÖ Overall - MAE: 0.0212, RMSE: 0.0322, R¬≤: 0.9962
üìà Stopped at epoch: 20
üîç Per-timestep performance (first 5):
   Timestep 1: MAE=0.0137, R¬≤=0.9989
   Timestep 2: MAE=0.0210, R¬≤=0.9974
   Timestep 3: MAE=0.0185, R¬≤=0.9976
   Timestep 4: MAE=0.0180, R¬≤=0.9975
   Timestep 5: MAE=0.0185, R¬≤=0.9972
üíæ ƒê√£ l∆∞u model: saved_models/rnn_model_window_12.h5
üíæ ƒê√£ l∆∞u scaler: saved_models/scaler_window_12.pkl
üíæ ƒê√£ l∆∞u config: saved_models/config_window_12.pkl
‚úÖ Ho√†n th√†nh v√† l∆∞u model cho window size 12
----------------------------------------

üîÑ Training v·ªõi Past Window: 24, Future Window: 24
üìä K√≠ch th∆∞·ªõc d·ªØ li·ªáu: X=(47455, 24, 1), y=(47455, 24)
üöÄ B·∫Øt ƒë·∫ßu training...
Restoring model weights from the end of the best epoch: 16.




‚úÖ Overall - MAE: 0.0299, RMSE: 0.0451, R¬≤: 0.9925
üìà Stopped at epoch: 20
üîç Per-timestep performance (first 5):
   Timestep 1: MAE=0.0094, R¬≤=0.9994
   Timestep 2: MAE=0.0150, R¬≤=0.9985
   Timestep 3: MAE=0.0213, R¬≤=0.9972
   Timestep 4: MAE=0.0202, R¬≤=0.9971
   Timestep 5: MAE=0.0194, R¬≤=0.9970
üíæ ƒê√£ l∆∞u model: saved_models/rnn_model_window_24.h5
üíæ ƒê√£ l∆∞u scaler: saved_models/scaler_window_24.pkl
üíæ ƒê√£ l∆∞u config: saved_models/config_window_24.pkl
‚úÖ Ho√†n th√†nh v√† l∆∞u model cho window size 24
----------------------------------------

üîÑ Training v·ªõi Past Window: 24, Future Window: 36
üìä K√≠ch th∆∞·ªõc d·ªØ li·ªáu: X=(47443, 24, 1), y=(47443, 36)
üöÄ B·∫Øt ƒë·∫ßu training...
Restoring model weights from the end of the best epoch: 17.




‚úÖ Overall - MAE: 0.0373, RMSE: 0.0565, R¬≤: 0.9883
üìà Stopped at epoch: 20
üîç Per-timestep performance (first 5):
   Timestep 1: MAE=0.0088, R¬≤=0.9994
   Timestep 2: MAE=0.0153, R¬≤=0.9983
   Timestep 3: MAE=0.0163, R¬≤=0.9980
   Timestep 4: MAE=0.0174, R¬≤=0.9977
   Timestep 5: MAE=0.0183, R¬≤=0.9973
üíæ ƒê√£ l∆∞u model: saved_models/rnn_model_window_36.h5
üíæ ƒê√£ l∆∞u scaler: saved_models/scaler_window_36.pkl
üíæ ƒê√£ l∆∞u config: saved_models/config_window_36.pkl
‚úÖ Ho√†n th√†nh v√† l∆∞u model cho window size 36
----------------------------------------

üîÑ Training v·ªõi Past Window: 24, Future Window: 72
üìä K√≠ch th∆∞·ªõc d·ªØ li·ªáu: X=(47407, 24, 1), y=(47407, 72)
üöÄ B·∫Øt ƒë·∫ßu training...
Restoring model weights from the end of the best epoch: 17.




‚úÖ Overall - MAE: 0.0559, RMSE: 0.0824, R¬≤: 0.9752
üìà Stopped at epoch: 20
üîç Per-timestep performance (first 5):
   Timestep 1: MAE=0.0188, R¬≤=0.9981
   Timestep 2: MAE=0.0190, R¬≤=0.9978
   Timestep 3: MAE=0.0225, R¬≤=0.9970
   Timestep 4: MAE=0.0239, R¬≤=0.9965
   Timestep 5: MAE=0.0252, R¬≤=0.9960
üíæ ƒê√£ l∆∞u model: saved_models/rnn_model_window_72.h5
üíæ ƒê√£ l∆∞u scaler: saved_models/scaler_window_72.pkl
üíæ ƒê√£ l∆∞u config: saved_models/config_window_72.pkl
‚úÖ Ho√†n th√†nh v√† l∆∞u model cho window size 72
----------------------------------------

üéâ Ho√†n th√†nh training t·∫•t c·∫£ c√°c window sizes!

üìä B·∫¢NG SO S√ÅNH K·∫æT QU·∫¢:
--------------------------------------------------------------------------------
Window Size  MAE      RMSE     R¬≤       Epochs  
--------------------------------------------------------------------------------
24           0.0110   0.0160   0.9991   20      
24           0.0212   0.0322   0.9962   20      
24           0.0299   0