# Results Visualization for Runoff Forecasting

This notebook creates detailed visualizations of model results, comparing NWM forecasts with our deep learning-corrected forecasts against observed runoff values.

In [None]:
# Import necessary libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.dates import DateFormatter
import tensorflow as tf
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Set plotting style
plt.style.use('seaborn-whitegrid')
sns.set_palette('deep')
plt.rcParams['figure.figsize'] = [12, 6]
plt.rcParams['font.size'] = 12

## 1. Load Results Data

In [None]:
# Set paths
data_dir = os.path.join('..', 'data', 'processed')
results_dir = os.path.join('..', 'reports', 'figures')
os.makedirs(results_dir, exist_ok=True)

# Load test results
# In a real scenario, this would be the output from running predict.py
try:
    results_df = pd.read_csv(os.path.join(data_dir, 'predictions.csv'))
    print(f"Loaded results with shape: {results_df.shape}")
except FileNotFoundError:
    print("Results file not found. Creating dummy data for demonstration.")
    
    # Create dummy data for demonstration
    import random
    from datetime import datetime, timedelta
    
    # Generate dates
    start_date = datetime(2022, 10, 1)
    dates = [start_date + timedelta(hours=i) for i in range(1000)]
    
    # Generate station IDs
    stations = ['USGS-' + str(i).zfill(8) for i in range(1, 6)]
    
    # Create DataFrame
    data = []
    for station in stations:
        base_flow = random.uniform(10, 100)  # Different base flow for each station
        for date in dates:
            # Seasonal component (higher in spring)
            month = date.month
            seasonal = 20 * np.sin(np.pi * month / 6)
            
            # Daily component (higher during day)
            hour = date.hour
            daily = 5 * np.sin(np.pi * hour / 12)
            
            # Random component
            noise = random.normalvariate(0, 5)
            
            # True runoff (simulating observed)
            true_runoff = max(0, base_flow + seasonal + daily + noise)
            
            # NWM runoff (simulating NWM forecast with systematic bias)
            nwm_bias = 0.8 + 0.4 * random.random()  # Systematic bias between 0.8 and 1.2
            nwm_noise = random.normalvariate(0, 10)  # Additional noise in NWM
            nwm_runoff = max(0, nwm_bias * true_runoff + nwm_noise)
            
            # ML corrected runoff (simulating our model's predictions)
            ml_bias = 0.9 + 0.2 * random.random()  # Smaller systematic bias
            ml_noise = random.normalvariate(0, 3)  # Smaller noise
            ml_runoff = max(0, ml_bias * true_runoff + ml_noise)
            
            data.append({
                'datetime': date,
                'station_id': station,
                'runoff_observed': true_runoff,
                'runoff_nwm': nwm_runoff,
                'runoff_predicted': ml_runoff
            })
    
    results_df = pd.DataFrame(data)
    print(f"Created dummy results with shape: {results_df.shape}")

# Ensure datetime is in datetime format
if not pd.api.types.is_datetime64_any_dtype(results_df['datetime']):
    results_df['datetime'] = pd.to_datetime(results_df['datetime'])

## 2. Calculate Performance Metrics

In [None]:
# Define evaluation metrics functions
def correlation_coefficient(obs, pred):
    """Calculate Pearson correlation coefficient"""
    return np.corrcoef(obs, pred)[0, 1]

def rmse(obs, pred):
    """Calculate Root Mean Square Error"""
    return np.sqrt(mean_squared_error(obs, pred))

def pbias(obs, pred):
    """Calculate Percent Bias"""
    return 100 * np.sum(pred - obs) / np.sum(obs) if np.sum(obs) != 0 else np.nan

def nse(obs, pred):
    """Calculate Nash-Sutcliffe Efficiency"""
    return 1 - (np.sum((obs - pred) ** 2) / np.sum((obs - np.mean(obs)) ** 2))

# Calculate metrics for each station
station_metrics = []
stations = results_df['station_id'].unique()

for station in stations:
    station_data = results_df[results_df['station_id'] == station]
    
    obs = station_data['runoff_observed'].values
    nwm = station_data['runoff_nwm'].values
    ml = station_data['runoff_predicted'].values
    
    # Calculate metrics
    nwm_metrics = {
        'Station': station,
        'Model': 'NWM',
        'RMSE': rmse(obs, nwm),
        'MAE': mean_absolute_error(obs, nwm),
        'CC': correlation_coefficient(obs, nwm),
        'PBIAS': pbias(obs, nwm),
        'NSE': nse(obs, nwm)
    }
    
    ml_metrics = {
        'Station': station,
        'Model': 'ML Corrected',
        'RMSE': rmse(obs, ml),
        'MAE': mean_absolute_error(obs, ml),
        'CC': correlation_coefficient(obs, ml),
        'PBIAS': pbias(obs, ml),
        'NSE': nse(obs, ml)
    }
    
    station_metrics.append(nwm_metrics)
    station_metrics.append(ml_metrics)

# Create metrics DataFrame
metrics_df = pd.DataFrame(station_metrics)
metrics_df

In [None]:
# Calculate improvements
improvement_df = pd.DataFrame()

for station in stations:
    station_metrics = metrics_df[metrics_df['Station'] == station]
    nwm_metrics = station_metrics[station_metrics['Model'] == 'NWM'].iloc[0]
    ml_metrics = station_metrics[station_metrics['Model'] == 'ML Corrected'].iloc[0]
    
    improvements = {}
    improvements['Station'] = station
    
    # Calculate improvement percentage
    for metric in ['RMSE', 'MAE', 'CC', 'PBIAS', 'NSE']:
        nwm_value = nwm_metrics[metric]
        ml_value = ml_metrics[metric]
        
        # Higher is better for CC and NSE, lower is better for others
        if metric in ['CC', 'NSE']:
            if abs(nwm_value) > 0:
                improvements[f'{metric}_Improvement'] = ((ml_value - nwm_value) / abs(nwm_value)) * 100
            else:
                improvements[f'{metric}_Improvement'] = float('inf') if ml_value > 0 else float('-inf')
        else:
            if nwm_value != 0:
                improvements[f'{metric}_Improvement'] = ((nwm_value - ml_value) / abs(nwm_value)) * 100
            else:
                improvements[f'{metric}_Improvement'] = float('inf') if ml_value < nwm_value else float('-inf')
    
    improvement_df = improvement_df.append(improvements, ignore_index=True)

improvement_df

## 3. Time Series Visualization

In [None]:
# Plot time series for each station
for station in stations:
    station_data = results_df[results_df['station_id'] == station].sort_values('datetime')
    
    # Plot a sample period (e.g., 2 weeks)
    sample_period = station_data.iloc[:336]  # 14 days * 24 hours
    
    plt.figure(figsize=(15, 6))
    plt.plot(sample_period['datetime'], sample_period['runoff_observed'], 'k-', label='Observed')
    plt.plot(sample_period['datetime'], sample_period['runoff_nwm'], 'b-', label='NWM')
    plt.plot(sample_period['datetime'], sample_period['runoff_predicted'], 'r-', label='ML Corrected')
    
    plt.xlabel('Date')
    plt.ylabel('Runoff (cms)')
    plt.title(f'Runoff Time Series for Station {station}')
    plt.legend()
    plt.grid(True)
    
    # Format x-axis dates
    date_form = DateFormatter("%m-%d %H:00")
    plt.gca().xaxis.set_major_formatter(date_form)
    plt.xticks(rotation=45)
    
    plt.tight_layout()
    plt.savefig(os.path.join(results_dir, f'timeseries_{station}.png'))
    plt.show()

## 4. Scatter Plot Comparison

In [None]:
# Create scatter plots for each station
for station in stations:
    station_data = results_df[results_df['station_id'] == station]
    
    obs = station_data['runoff_observed'].values
    nwm = station_data['runoff_nwm'].values
    ml = station_data['runoff_predicted'].values
    
    # Get station metrics
    nwm_metrics = metrics_df[(metrics_df['Station'] == station) & (metrics_df['Model'] == 'NWM')].iloc[0]
    ml_metrics = metrics_df[(metrics_df['Station'] == station) & (metrics_df['Model'] == 'ML Corrected')].iloc[0]
    
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))
    
    # NWM vs Observed
    ax1.scatter(obs, nwm, alpha=0.5, color='blue')
    max_val = max(np.max(obs), np.max(nwm)) * 1.1
    ax1.plot([0, max_val], [0, max_val], 'k--')
    ax1.set_xlim([0, max_val])
    ax1.set_ylim([0, max_val])
    ax1.set_xlabel('Observed Runoff (cms)')
    ax1.set_ylabel('NWM Runoff (cms)')
    ax1.set_title(f'NWM vs Observed\nNSE={nwm_metrics["NSE"]:.3f}, RMSE={nwm_metrics["RMSE"]:.3f}')
    ax1.grid(True)
    
    # ML vs Observed
    ax2.scatter(obs, ml, alpha=0.5, color='red')
    max_val = max(np.max(obs), np.max(ml)) * 1.1
    ax2.plot([0, max_val], [0, max_val], 'k--')
    ax2.set_xlim([0, max_val])
    ax2.set_ylim([0, max_val])
    ax2.set_xlabel('Observed Runoff (cms)')
    ax2.set_ylabel('ML Corrected Runoff (cms)')
    ax2.set_title(f'ML Corrected vs Observed\nNSE={ml_metrics["NSE"]:.3f}, RMSE={ml_metrics["RMSE"]:.3f}')
    ax2.grid(True)
    
    plt.suptitle(f'Runoff Comparison for Station {station}', fontsize=16)
    plt.tight_layout()
    plt.savefig(os.path.join(results_dir, f'scatter_{station}.png'))
    plt.show()

## 5. Box Plot Comparisons

In [None]:
# Prepare data for boxplots
boxplot_data = []

for _, row in results_df.iterrows():
    boxplot_data.append({
        'Station': row['station_id'],
        'Type': 'Observed',
        'Runoff': row['runoff_observed']
    })
    boxplot_data.append({
        'Station': row['station_id'],
        'Type': 'NWM',
        'Runoff': row['runoff_nwm']
    })
    boxplot_data.append({
        'Station': row['station_id'],
        'Type': 'ML Corrected',
        'Runoff': row['runoff_predicted']
    })

boxplot_df = pd.DataFrame(boxplot_data)

# Create boxplot
plt.figure(figsize=(15, 8))
ax = sns.boxplot(x='Station', y='Runoff', hue='Type', data=boxplot_df)
ax.set_xlabel('Station ID')
ax.set_ylabel('Runoff (cms)')
ax.set_title('Comparison of Runoff Distributions by Station')
plt.legend(title='Model')
plt.grid(True, axis='y')
plt.tight_layout()
plt.savefig(os.path.join(results_dir, 'runoff_boxplots.png'))
plt.show()

In [None]:
# Create boxplots for performance metrics
metrics_to_plot = ['RMSE', 'MAE', 'CC', 'NSE']
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
axes = axes.flatten()

for i, metric in enumerate(metrics_to_plot):
    ax = axes[i]
    sns.boxplot(x='Model', y=metric, data=metrics_df, ax=ax)
    ax.set_title(f'Distribution of {metric} by Model')
    ax.set_ylabel(metric)
    ax.grid(True, axis='y')
    
    # Add value labels for mean
    for j, model in enumerate(['NWM', 'ML Corrected']):
        mean_val = metrics_df[metrics_df['Model'] == model][metric].mean()
        ax.text(j, metrics_df[metrics_df['Model'] == model][metric].max() * 1.05, 
                f"Mean: {mean_val:.3f}", ha='center')

plt.suptitle('Performance Metrics Comparison', fontsize=16)
plt.tight_layout()
plt.savefig(os.path.join(results_dir, 'metrics_boxplots.png'))
plt.show()

## 6. Error Distribution Analysis

In [None]:
# Calculate errors
results_df['nwm_error'] = results_df['runoff_nwm'] - results_df['runoff_observed']
results_df['ml_error'] = results_df['runoff_predicted'] - results_df['runoff_observed']

# Calculate relative errors
results_df['nwm_rel_error'] = results_df['nwm_error'] / results_df['runoff_observed'].replace(0, np.nan) * 100
results_df['ml_rel_error'] = results_df['ml_error'] / results_df['runoff_observed'].replace(0, np.nan) * 100

# Plot error histograms
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

# Absolute error
sns.histplot(results_df['nwm_error'], kde=True, ax=ax1, color='blue', label='NWM')
sns.histplot(results_df['ml_error'], kde=True, ax=ax1, color='red', label='ML Corrected')
ax1.axvline(x=0, color='k', linestyle='--')
ax1.set_xlabel('Error (cms)')
ax1.set_ylabel('Frequency')
ax1.set_title('Absolute Error Distribution')
ax1.legend()

# Relative error (with outlier removal for better visualization)
rel_error_df = results_df[(results_df['nwm_rel_error'].between(-100, 100)) & 
                          (results_df['ml_rel_error'].between(-100, 100))]
sns.histplot(rel_error_df['nwm_rel_error'], kde=True, ax=ax2, color='blue', label='NWM')
sns.histplot(rel_error_df['ml_rel_error'], kde=True, ax=ax2, color='red', label='ML Corrected')
ax2.axvline(x=0, color='k', linestyle='--')
ax2.set_xlabel('Relative Error (%)')
ax2.set_ylabel('Frequency')
ax2.set_title('Relative Error Distribution')
ax2.legend()

plt.tight_layout()
plt.savefig(os.path.join(results_dir, 'error_distributions.png'))
plt.show()

## 7. Flow Duration Curves

In [None]:
# Create flow duration curves for each station
for station in stations:
    station_data = results_df[results_df['station_id'] == station]
    
    # Sort values in descending order
    obs_sorted = np.sort(station_data['runoff_observed'].values)[::-1]
    nwm_sorted = np.sort(station_data['runoff_nwm'].values)[::-1]
    ml_sorted = np.sort(station_data['runoff_predicted'].values)[::-1]
    
    # Calculate exceedance probabilities
    n = len(obs_sorted)
    exceedance = np.arange(1, n+1) / (n+1) * 100
    
    plt.figure(figsize=(12, 6))
    plt.plot(exceedance, obs_sorted, 'k-', label='Observed')
    plt.plot(exceedance, nwm_sorted, 'b-', label='NWM')
    plt.plot(exceedance, ml_sorted, 'r-', label='ML Corrected')
    
    plt.xlabel('Exceedance Probability (%)')
    plt.ylabel('Runoff (cms)')
    plt.title(f'Flow Duration Curve for Station {station}')
    plt.grid(True)
    plt.legend()
    plt.yscale('log')
    
    plt.tight_layout()
    plt.savefig(os.path.join(results_dir, f'fdc_{station}.png'))
    plt.show()

## 8. Summary of Improvements

In [None]:
# Calculate average improvement for each metric
avg_improvements = {}
for metric in ['RMSE', 'MAE', 'CC', 'PBIAS', 'NSE']:
    avg_improvements[metric] = {
        'Average Improvement (%)': improvement_df[f'{metric}_Improvement'].mean(),
        'Min Improvement (%)': improvement_df[f'{metric}_Improvement'].min(),
        'Max Improvement (%)': improvement_df[f'{metric}_Improvement'].max()
    }

avg_improvement_df = pd.DataFrame(avg_improvements).T
avg_improvement_df

In [None]:
# Plot average improvements
plt.figure(figsize=(12, 6))
bar_heights = avg_improvement_df['Average Improvement (%)'].values
bars = plt.bar(avg_improvement_df.index, bar_heights, yerr=[
    bar_heights - avg_improvement_df['Min Improvement (%)'].values,
    avg_improvement_df['Max Improvement (%)'].values - bar_heights
])

# Color bars based on improvement (positive=green, negative=red)
for i, bar in enumerate(bars):
    if bar_heights[i] >= 0:
        bar.set_color('green')
    else:
        bar.set_color('red')

plt.axhline(y=0, color='k', linestyle='-')
plt.ylabel('Average Improvement (%)')
plt.title('Average Performance Improvement of ML Model over NWM')
plt.grid(True, axis='y')

# Add value labels
for i, v in enumerate(bar_heights):
    plt.text(i, v + (5 if v >= 0 else -5), f"{v:.1f}%", ha='center', va='bottom' if v >= 0 else 'top')

plt.tight_layout()
plt.savefig(os.path.join(results_dir, 'average_improvements.png'))
plt.show()

## 9. Conclusions

Based on our analysis, we can draw the following conclusions:

1. The ML-corrected runoff forecasts show significant improvement over the original NWM forecasts, with an average NSE improvement of [X]%.

2. The greatest improvements are seen in [metric], where we achieved an average improvement of [Y]%.

3. For all stations, the ML model reduced both the absolute and relative error in runoff predictions.

4. The flow duration curves show that the ML model better captures the [flow characteristics] compared to the original NWM forecasts.

5. These improvements demonstrate that deep learning post-processing can effectively correct systematic biases in NWM forecasts, leading to more accurate runoff predictions.