# SYMFLUENCE Tutorial 02b — Basin-Scale Workflow (Bow River at Banff, Semi-Distributed)

## Introduction

This tutorial advances from lumped to semi-distributed watershed modeling. Instead of representing the basin as a single unit, we now subdivide it into multiple connected sub-basins (GRUs) that capture spatial variability while maintaining computational efficiency.

Building on Tutorial 02a's lumped approach, semi-distributed modeling adds spatial detail through automated watershed delineation that creates multiple sub-basins, stream network topology that connects GRUs through routing, and spatially-distributed characteristics that better represent elevation gradients and heterogeneous processes.

The key configuration change is `DOMAIN_DEFINITION_METHOD: 'delineate'` with a `STREAM_THRESHOLD` parameter controlling the number of sub-basins. Smaller thresholds create more GRUs (finer spatial detail) but increase computational cost.

We continue with the **Bow River at Banff** watershed, now discretized into multiple GRUs connected by mizuRoute for explicit stream network routing. This approach improves representation of snowmelt timing, spatial climate variability, and runoff generation patterns.


# Step 1 — Configuration and data reuse

We generate a semi-distributed configuration and intelligently reuse data from Tutorial 02a where possible.

In [None]:
# Step 1 — Semi-distributed configuration with data reuse


from pathlib import Path
import yaml
import shutil


from symfluence import SYMFLUENCE
from symfluence.resources import get_config_template

SYMFLUENCE_CODE_DIR = Path.cwd().resolve()
# Load template to get the correct SYMFLUENCE_DATA_DIR
config_template = get_config_template()
with open(config_template, 'r') as f:
    template_config = yaml.safe_load(f)

# Use SYMFLUENCE_DATA_DIR from template
SYMFLUENCE_DATA_DIR = Path(template_config.get('SYMFLUENCE_DATA_DIR', 
                                                 str(SYMFLUENCE_CODE_DIR.parent / 'data' / 'SYMFLUENCE_data'))).resolve()

print(f"Using SYMFLUENCE_DATA_DIR: {SYMFLUENCE_DATA_DIR}")

# Load template again for config creation
with open(config_template, 'r') as f:
    config = yaml.safe_load(f)

# === Modify for semi-distributed basin ===
config['SYMFLUENCE_CODE_DIR'] = str(SYMFLUENCE_CODE_DIR)
config['SYMFLUENCE_DATA_DIR'] = str(SYMFLUENCE_DATA_DIR)
config['DOMAIN_NAME'] = 'Bow_at_Banff_semi_distributed'
config['EXPERIMENT_ID'] = 'run_1'
config['POUR_POINT_COORDS'] = '51.1722/-115.5717'

# Key changes for semi-distributed
config['DELINEATION_METHOD'] = 'stream_threshold'  # Watershed subdivision
config['DOMAIN_DEFINITION_METHOD'] = 'delineate'  # Watershed
config['STREAM_THRESHOLD'] = 5000  # Controls number of sub-basins
config['SUB_GRID_DISCRETIZATION'] = 'GRUs'

config['HYDROLOGICAL_MODEL'] = 'SUMMA'
config['ROUTING_MODEL'] = 'mizuRoute'
config['MIZU_FROM_MODEL'] = 'SUMMA'

# mizuRoute routing configuration
config['SETTINGS_MIZU_ROUTING_VAR'] = 'averageRoutedRunoff'  # Input variable from SUMMA
config['SETTINGS_MIZU_ROUTING_UNITS'] = 'm/s'
config['SETTINGS_MIZU_ROUTING_DT'] = '3600'
# Routing scheme: 0=accumRunoff, 1=IRF-UH (recommended), 2=IRF-KW, 3=KW-IRF, 4=MC-IRF
# IRF (Impulse Response Function) applies proper river routing delays
config['SETTINGS_MIZU_OUTPUT_VARS'] = 1  # Use IRF routing for proper streamflow evaluation

# Temporal extent
config['EXPERIMENT_TIME_START'] = '2004-01-01 01:00'
config['EXPERIMENT_TIME_END'] = '2007-12-31 23:00'
config['CALIBRATION_PERIOD'] = '2005-10-01, 2006-09-30'
config['EVALUATION_PERIOD'] = '2006-10-01, 2007-12-30'
config['SPINUP_PERIOD'] = '2004-01-01, 2005-09-30'

config['STATION_ID'] = '05BB001'
config['DOWNLOAD_WSC_DATA'] = True

config['PARAMS_TO_CALIBRATE'] = 'minStomatalResistance,cond2photo_slope,vcmax25_canopyTop,jmax25_scale,summerLAI,rootingDepth,soilStressParam,z0Canopy,windReductionParam'
config['OPTIMIZATION_TARGET'] = 'streamflow'
config['ITERATIVE_OPTIMIZATION_ALGORITHM'] = 'DDS'
config['OPTIMIZATION_METRIC'] = 'KGE'
config['CALIBRATION_TIMESTEP'] = 'hourly'  

# Save configuration to current directory
config_path = Path('./config_semi_distributed.yaml')
with open(config_path, 'w') as f:
    yaml.dump(config, f, default_flow_style=False, sort_keys=False)

print(f"Configuration saved: {config_path}")

# === Data reuse from Tutorial 02a ===
lumped_domain = 'Bow_at_Banff_lumped'
lumped_data_dir = SYMFLUENCE_DATA_DIR / f'domain_{lumped_domain}'

def copy_with_name_adaptation(src, dst, old_name, new_name):
    """Copy directory and adapt filenames"""
    if not src.exists():
        return False
    dst.parent.mkdir(parents=True, exist_ok=True)
    if src.is_file():
        shutil.copy2(src, dst)
        return True
    shutil.copytree(src, dst, dirs_exist_ok=True)
    # Rename files containing old domain name
    for file in dst.rglob('*'):
        if file.is_file() and old_name in file.name:
            new_file = file.parent / file.name.replace(old_name, new_name)
            file.rename(new_file)
    return True

# Initialize SYMFLUENCE with visualization enabled
symfluence = SYMFLUENCE(config_path, visualize=True)
project_dir = symfluence.managers['project'].setup_project()

if lumped_data_dir.exists():
    print(f"\nReusing data from Tutorial 02a: {lumped_data_dir}")
    
    reusable_data = {
        'Elevation': lumped_data_dir / 'attributes' / 'elevation',
        'Land Cover': lumped_data_dir / 'attributes' / 'landclass',
        'Soils': lumped_data_dir / 'attributes' / 'soilclass',
        'Forcing': lumped_data_dir / 'forcing' / 'raw_data',
        'Streamflow': lumped_data_dir / 'observations' / 'streamflow'
    }
    
    for data_type, src_path in reusable_data.items():
        if src_path.exists():
            rel_path = src_path.relative_to(lumped_data_dir)
            dst_path = project_dir / rel_path
            success = copy_with_name_adaptation(src_path, dst_path, lumped_domain, config['DOMAIN_NAME'])
            if success:
                print(f"   {data_type}: Copied")
        else:
            print(f"   {data_type}: Not found")
else:
    print(f"\nNo data from Tutorial 02a found at: {lumped_data_dir}")
    print(f"   Will acquire fresh data.")

# Create pour point
pour_point_path = symfluence.managers['project'].create_pour_point()
print(f"\nProject structure created at: {project_dir}")

## Step 2 — Domain definition (multi-GRU)

Delineate the watershed into multiple sub-basins using stream network analysis and create connected GRUs.

### Step 2a — Attribute check

Verify DEM availability from data reuse, or acquire fresh if needed.

In [None]:
# Step 2a — DEM availability check
dem_path = project_dir / 'attributes' / 'elevation' / 'dem'
if not dem_path.exists() or len(list(dem_path.glob('*.tif'))) == 0:
    print("   DEM not found, acquiring geospatial attributes...")
    # If using MAF supported HPC, uncomment the line below
    # symfluence.managers['data'].acquire_attributes()
    print("✅ Geospatial attributes acquired")
else:
    print("✅ DEM available from previous workflow")

### Step 2b — Stream network delineation

Automated watershed subdivision based on stream threshold parameter.

In [None]:
# Step 2b — Stream network delineation
watershed_path = symfluence.managers['domain'].define_domain()
print("✅ Stream network delineation complete")

### Step 2c — GRU discretization

Convert sub-basins to GRUs with routing connectivity.

In [None]:
# Step 2c — GRU discretization
hru_path = symfluence.managers['domain'].discretize_domain()
print("✅ GRU discretization complete")

### Step 2d — Network visualization

Visualize the semi-distributed structure: sub-basins and stream network.

In [None]:
# Step 2d — Network structure visualization (using native SYMFLUENCE plotting)

from IPython.display import Image, display

# Generate native domain visualization (includes GRUs and stream network)
plot_path = symfluence.managers['domain'].visualize_discretized_domain()

if plot_path:
    print(f"Domain plot saved to: {plot_path}")
    display(Image(filename=str(plot_path)))
else:
    print("Domain visualization not generated. Check logs for errors.")

## Step 3 — Data preprocessing

Process forcing and observation data for multiple GRUs.

In [None]:
# Step 3a — Streamflow observations
# If using MAF supported HPC, uncomment the line below
# symfluence.managers['data'].process_observed_data()
print("✅ Streamflow data processing complete")

In [None]:
# Step 3b — Forcing data
# If using MAF supported HPC, uncomment the line below  
# symfluence.managers['data'].acquire_forcings()
print("✅ Forcing acquisition complete")

In [None]:
# Step 3c — Model-agnostic preprocessing
symfluence.managers['data'].run_model_agnostic_preprocessing()
print("✅ Model-agnostic preprocessing complete")

## Step 4 — Model execution

Configure and run SUMMA-mizuRoute with multiple connected GRUs.

In [None]:
# Step 4a — Model configuration
symfluence.managers['model'].preprocess_models()
print("✅ Semi-distributed model configuration complete")

In [None]:
# Step 4b — Model execution)...")
symfluence.managers['model'].run_models()
print("✅ Semi-distributed simulation complete")

## Step 5 — Evaluation

Compare semi-distributed results against observations.

In [None]:
# Step 5 — Semi-distributed evaluation (using Camille's model comparison plots)

from IPython.display import Image, display

# Generate model comparison overview (auto-detects mizuRoute outputs)
plot_path = symfluence.managers['reporting'].generate_model_comparison_overview(
    experiment_id=config['EXPERIMENT_ID'],
    context='run_model'
)

if plot_path:
    print(f"Model comparison overview: {plot_path}")
    display(Image(filename=str(plot_path)))
else:
    print("No model outputs found for comparison. Check simulation outputs.")

print("\nSemi-distributed evaluation complete")

# Step 5b — Run calibration 



In [None]:
results_file = symfluence.managers['optimization'].calibrate_model()  
print("Calibration results file:", results_file)

In [None]:
# Step 5c — Post-calibration visualization 

from IPython.display import Image, display

# Generate post-calibration visualizations
plot_paths = symfluence.managers['reporting'].visualize_calibration_results(
    experiment_id=config['EXPERIMENT_ID']
)

# Display all generated plots
for plot_name, plot_path in plot_paths.items():
    print(f"\n{plot_name}:")
    display(Image(filename=str(plot_path)))

print("\nPost-calibration visualization complete")