# Deep Learning Model Development for Runoff Forecasting

This notebook explores different deep learning architectures for improving NWM runoff forecasts. We'll compare LSTM, GRU, Transformer, and hybrid models to find the optimal approach.

In [None]:
# Import necessary libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, LSTM, GRU, Dropout, Input, MultiHeadAttention, LayerNormalization
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Set plotting style
plt.style.use('seaborn-whitegrid')
sns.set_palette('deep')
plt.rcParams['figure.figsize'] = [12, 6]

## 1. Load and Prepare Data

In [None]:
# Set paths
data_dir = os.path.join('..', 'data', 'processed')
models_dir = os.path.join('..', 'models')
os.makedirs(models_dir, exist_ok=True)

# Load processed data
try:
    train_val_df = pd.read_csv(os.path.join(data_dir, 'train_validation_data.csv'))
    test_df = pd.read_csv(os.path.join(data_dir, 'test_data.csv'))
    print(f"Training/validation data shape: {train_val_df.shape}")
    print(f"Test data shape: {test_df.shape}")
except FileNotFoundError:
    print("Processed data files not found. Please run the preprocessing script first.")
    # Create dummy data for demonstration
    print("Creating dummy data for demonstration.")
    train_val_df = pd.DataFrame(np.random.random((1000, 10)), columns=[f'feature_{i}' for i in range(9)] + ['target'])
    test_df = pd.DataFrame(np.random.random((200, 10)), columns=[f'feature_{i}' for i in range(9)] + ['target'])

In [None]:
# Define feature preparation function
def prepare_data(df, feature_cols, target_col, sequence_length=24, train=False):
    """
    Prepare sequential data for model training/evaluation.
    
    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame with features and target
    feature_cols : list
        List of feature column names
    target_col : str
        Target column name
    sequence_length : int
        Length of input sequences
    train : bool
        Whether this is training data (to fit or use existing scalers)
        
    Returns:
    --------
    X_seq : numpy.ndarray
        Sequence feature array
    y : numpy.ndarray
        Target array
    scaler_X, scaler_y : sklearn.preprocessing.StandardScaler
        Fitted scalers for features and target
    """
    # Initialize or use existing scalers
    if train or not hasattr(prepare_data, 'scaler_X'):
        prepare_data.scaler_X = StandardScaler().fit(df[feature_cols])
        prepare_data.scaler_y = StandardScaler().fit(df[[target_col]])
    
    # Scale data
    X = prepare_data.scaler_X.transform(df[feature_cols])
    y = prepare_data.scaler_y.transform(df[[target_col]]).flatten()
    
    # Create sequences
    X_seq = []
    y_seq = []
    
    for i in range(len(X) - sequence_length):
        X_seq.append(X[i:i+sequence_length])
        y_seq.append(y[i+sequence_length])
    
    return np.array(X_seq), np.array(y_seq), prepare_data.scaler_X, prepare_data.scaler_y

In [None]:
# Define feature and target columns
# In a real scenario, you would select appropriate columns from your data
feature_columns = [col for col in train_val_df.columns if col.startswith('feature_')]
target_column = 'target' if 'target' in train_val_df.columns else 'runoff_usgs'

# Split training and validation
from sklearn.model_selection import train_test_split
train_df, val_df = train_test_split(train_val_df, test_size=0.2, random_state=42)

# Prepare sequences
sequence_length = 24  # Use 24-hour sequences
X_train, y_train, scaler_X, scaler_y = prepare_data(train_df, feature_columns, target_column, sequence_length, train=True)
X_val, y_val, _, _ = prepare_data(val_df, feature_columns, target_column, sequence_length)
X_test, y_test, _, _ = prepare_data(test_df, feature_columns, target_column, sequence_length)

print(f"Training sequences: {X_train.shape}")
print(f"Validation sequences: {X_val.shape}")
print(f"Test sequences: {X_test.shape}")

## 2. Model Architectures

Let's implement different model architectures for comparison.

In [None]:
def create_lstm_model(input_shape, output_shape=1):
    """Create LSTM model"""
    model = Sequential([
        LSTM(64, activation='relu', return_sequences=True, input_shape=input_shape),
        Dropout(0.2),
        LSTM(32, activation='relu'),
        Dropout(0.2),
        Dense(16, activation='relu'),
        Dense(output_shape)
    ])
    
    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='mse',
        metrics=['mae']
    )
    
    return model

def create_gru_model(input_shape, output_shape=1):
    """Create GRU model"""
    model = Sequential([
        GRU(64, activation='relu', return_sequences=True, input_shape=input_shape),
        Dropout(0.2),
        GRU(32, activation='relu'),
        Dropout(0.2),
        Dense(16, activation='relu'),
        Dense(output_shape)
    ])
    
    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='mse',
        metrics=['mae']
    )
    
    return model

def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    """Create a Transformer encoder block"""
    # Multi-head attention
    attention_output = MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(inputs, inputs)
    
    # Skip connection and layer normalization
    x = LayerNormalization(epsilon=1e-6)(inputs + attention_output)
    
    # Feed-forward network
    ff_output = Sequential([
        Dense(ff_dim, activation='relu'),
        Dense(inputs.shape[-1]),
        Dropout(dropout)
    ])(x)
    
    # Skip connection and layer normalization
    x = LayerNormalization(epsilon=1e-6)(x + ff_output)
    
    return x

def create_transformer_model(input_shape, output_shape=1, head_size=256, num_heads=4, 
                            ff_dim=512, num_transformer_blocks=4, mlp_units=[128], dropout=0.2):
    """Create Transformer model"""
    inputs = Input(shape=input_shape)
    x = inputs
    
    # Transformer blocks
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)
    
    # Global average pooling
    x = tf.keras.layers.GlobalAveragePooling1D()(x)
    
    # Final MLP layers
    for dim in mlp_units:
        x = Dense(dim, activation='relu')(x)
        x = Dropout(dropout)(x)
    
    outputs = Dense(output_shape)(x)
    
    model = Model(inputs=inputs, outputs=outputs)
    
    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='mse',
        metrics=['mae']
    )
    
    return model

def create_hybrid_model(input_shape, output_shape=1):
    """Create hybrid CNN-LSTM model"""
    inputs = Input(shape=input_shape)
    
    # CNN layers for feature extraction
    x = tf.keras.layers.Conv1D(filters=64, kernel_size=3, padding='same', activation='relu')(inputs)
    x = tf.keras.layers.MaxPooling1D(pool_size=2)(x)
    x = tf.keras.layers.Conv1D(filters=32, kernel_size=3, padding='same', activation='relu')(x)
    
    # LSTM layers for sequential processing
    x = LSTM(32, activation='relu', return_sequences=True)(x)
    x = Dropout(0.2)(x)
    x = LSTM(16, activation='relu')(x)
    x = Dropout(0.2)(x)
    
    # Dense layers for output
    x = Dense(16, activation='relu')(x)
    outputs = Dense(output_shape)(x)
    
    model = Model(inputs=inputs, outputs=outputs)
    
    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='mse',
        metrics=['mae']
    )
    
    return model

## 3. Train and Compare Models

Let's train each model architecture and compare their performance.

In [None]:
# Training parameters
batch_size = 32
epochs = 50
input_shape = (X_train.shape[1], X_train.shape[2])  # (sequence_length, features)

# Callbacks for training
callbacks = [
    EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
]

# Train LSTM model
print("Training LSTM model...")
lstm_model = create_lstm_model(input_shape)
lstm_history = lstm_model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=epochs,
    batch_size=batch_size,
    callbacks=callbacks,
    verbose=1
)

In [None]:
# Train GRU model
print("Training GRU model...")
gru_model = create_gru_model(input_shape)
gru_history = gru_model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=epochs,
    batch_size=batch_size,
    callbacks=callbacks,
    verbose=1
)

In [None]:
# Train Transformer model
print("Training Transformer model...")
transformer_model = create_transformer_model(input_shape)
transformer_history = transformer_model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=epochs,
    batch_size=batch_size,
    callbacks=callbacks,
    verbose=1
)

In [None]:
# Train Hybrid model
print("Training Hybrid CNN-LSTM model...")
hybrid_model = create_hybrid_model(input_shape)
hybrid_history = hybrid_model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=epochs,
    batch_size=batch_size,
    callbacks=callbacks,
    verbose=1
)

## 4. Compare Model Performance

In [None]:
# Function to plot training history
def plot_training_history(histories, model_names):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
    
    for history, name in zip(histories, model_names):
        # Plot loss
        ax1.plot(history.history['loss'], label=f'{name} Training')
        ax1.plot(history.history['val_loss'], label=f'{name} Validation')
    
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Loss (MSE)')
    ax1.set_title('Training and Validation Loss')
    ax1.legend()
    
    for history, name in zip(histories, model_names):
        # Plot MAE
        ax2.plot(history.history['mae'], label=f'{name} Training')
        ax2.plot(history.history['val_mae'], label=f'{name} Validation')
    
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('MAE')
    ax2.set_title('Training and Validation MAE')
    ax2.legend()
    
    plt.tight_layout()
    
# Plot training histories
histories = [lstm_history, gru_history, transformer_history, hybrid_history]
model_names = ['LSTM', 'GRU', 'Transformer', 'Hybrid']
plot_training_history(histories, model_names)

In [None]:
# Evaluate on test set
def evaluate_model(model, X_test, y_test, scaler_y, model_name):
    """Evaluate model and compute metrics"""
    # Make predictions
    y_pred = model.predict(X_test).flatten()
    
    # Convert to original scale
    y_test_orig = scaler_y.inverse_transform(y_test.reshape(-1, 1)).flatten()
    y_pred_orig = scaler_y.inverse_transform(y_pred.reshape(-1, 1)).flatten()
    
    # Calculate metrics
    mse = mean_squared_error(y_test_orig, y_pred_orig)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test_orig, y_pred_orig)
    
    # Calculate hydrological metrics
    cc = np.corrcoef(y_test_orig, y_pred_orig)[0, 1]
    pbias = 100 * np.sum(y_pred_orig - y_test_orig) / np.sum(y_test_orig) if np.sum(y_test_orig) != 0 else np.nan
    nse = 1 - (np.sum((y_test_orig - y_pred_orig) ** 2) / np.sum((y_test_orig - np.mean(y_test_orig)) ** 2))
    
    # Return metrics
    return {
        'Model': model_name,
        'MSE': mse,
        'RMSE': rmse,
        'MAE': mae,
        'CC': cc,
        'PBIAS': pbias,
        'NSE': nse
    }

# Evaluate all models
models = [lstm_model, gru_model, transformer_model, hybrid_model]
model_names = ['LSTM', 'GRU', 'Transformer', 'Hybrid']

results = []
for model, name in zip(models, model_names):
    metrics = evaluate_model(model, X_test, y_test, scaler_y, name)
    results.append(metrics)

# Display results
results_df = pd.DataFrame(results)
results_df

In [None]:
# Plot comparison of models
metrics_to_plot = ['RMSE', 'MAE', 'CC', 'NSE']
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
axes = axes.flatten()

for i, metric in enumerate(metrics_to_plot):
    ax = axes[i]
    ax.bar(results_df['Model'], results_df[metric])
    ax.set_title(f'Comparison of {metric}')
    ax.set_ylabel(metric)
    
    # Add value labels
    for j, v in enumerate(results_df[metric]):
        ax.text(j, v, f"{v:.4f}", ha='center', va='bottom')

plt.tight_layout()

## 5. Select Best Model and Save

In [None]:
# Identify best model based on NSE
best_model_idx = results_df['NSE'].idxmax()
best_model_name = results_df.loc[best_model_idx, 'Model']
best_model = models[best_model_idx]

print(f"Best model is {best_model_name} with NSE = {results_df.loc[best_model_idx, 'NSE']:.4f}")

# Save best model
model_path = os.path.join(models_dir, 'nwm_dl_model.keras')
best_model.save(model_path)
print(f"Model saved to {model_path}")

## 6. Visualize Predictions from Best Model

In [None]:
# Make predictions with best model
y_pred = best_model.predict(X_test).flatten()

# Convert to original scale
y_test_orig = scaler_y.inverse_transform(y_test.reshape(-1, 1)).flatten()
y_pred_orig = scaler_y.inverse_transform(y_pred.reshape(-1, 1)).flatten()

# Plot predictions vs actual
plt.figure(figsize=(12, 6))
plt.plot(y_test_orig, label='Actual')
plt.plot(y_pred_orig, label='Predicted')
plt.title(f'{best_model_name} Model: Predictions vs Actual')
plt.xlabel('Sample Index')
plt.ylabel('Runoff')
plt.legend()
plt.show()

# Scatter plot
plt.figure(figsize=(8, 8))
plt.scatter(y_test_orig, y_pred_orig, alpha=0.5)
max_val = max(np.max(y_test_orig), np.max(y_pred_orig)) * 1.1
plt.plot([0, max_val], [0, max_val], 'k--')
plt.xlim([0, max_val])
plt.ylim([0, max_val])
plt.title(f'{best_model_name} Model: Predicted vs Actual')
plt.xlabel('Actual')
plt.ylabel('Predicted')
plt.axis('equal')
plt.show()

## 7. Conclusions

Based on our experiments, we can draw the following conclusions:

1. The [best_model_name] model performed best, achieving the highest NSE value of [NSE_value].
2. [Add other observations about performance differences].
3. The model successfully [mention any specific improvements over baseline NWM forecasts].
4. Areas for future improvement include [your suggestions].

These results demonstrate that deep learning approaches can effectively improve NWM runoff forecasts.