# Balance checks
This notebook calculates water and energy balances  for all six laugh tests.

## Expectations
Balance errors should be small, relative to maginutes of states and fluxes.

The Vanderborght case requires a special approach because its outputs are only calculated for a single time step. Therefore we need to calculate the balance using the initial conditions as start of the time window.

## Meta data

| Data  | Value  |
|:---|:---|
| Model name| Structure for Unifying Multiple Modelling Alternatives (SUMMA) |
| Model version  | See attributes in output .nc file |
| Model reference | Clark et al. (2015a,b) |
| Model runs by | R. Zolfaghari |
| Notebook code by | W. Knoben, A. Bennett |

In [359]:
# modules
from pathlib import Path
from operator import truediv 
import numpy as np
import pandas as pd
import xarray as xr # note, also needs netcdf4 library installed

In [327]:
# Specify the data locations relative to the notebook
file_paths = {'Celia'               : '../lt1_celia1990/output/celia1990_output_timestep.nc',
              'Miller clay'         : '../lt2_miller1998/output/millerClay_output_timestep.nc',
              'Miller loam'         : '../lt2_miller1998/output/millerLoam_output_timestep.nc',
              'Miller sand'         : '../lt2_miller1998/output/millerSand_output_timestep.nc',
              'Vanderborght exp = 1': '../lt3_vanderborght2005/output/vanderborght2005_exp1_output_timestep.nc',
              'Vanderborght exp = 2': '../lt3_vanderborght2005/output/vanderborght2005_exp2_output_timestep.nc',
              'Vanderborght exp = 3': '../lt3_vanderborght2005/output/vanderborght2005_exp3_output_timestep.nc',
              'Wigmosta exp = 1'    : '../lt4_wigmosta1994/output/syntheticHillslope-exp1_output_timestep.nc',
              'Wigmosta exp = 2'    : '../lt4_wigmosta1994/output/syntheticHillslope-exp2_output_timestep.nc',
              'Colbeck exp = 1'     : '../lt5_colbeck1976/output/colbeck1976-exp1_output_timestep.nc',
              'Colbeck exp = 2'     : '../lt5_colbeck1976/output/colbeck1976-exp2_output_timestep.nc',
              'Colbeck exp = 3'     : '../lt5_colbeck1976/output/colbeck1976-exp3_output_timestep.nc',
              'Mizoguchi'           : '../lt6_mizoguchi1990/output/mizoguchi1990_output_timestep.nc'}

In [328]:
# Selection parameters
# We can use these to remove the GRU and HRU dimensions from the output files > easier data handling
GRU, HRU = 0,0

In [407]:
# function to calculate the water balance components on a given time step
def calc_wb(dat,domain):
    
    # Input: xarray dataset with variables:
    # - time 
    # - nSoil
    # - nSnow
    # - nLayers
    # - mLayerDepth
    # - mLayerVolFracLiq
    # - mLayerVolFraqIce
    # - iLayerLiqFluxSoil
    # - mLayerTranspire
    # - mLayerBaseflow
    # - mLayerCompress
    
    # Make some storage variables
    vec_liqError  = []
    vec_state     = []
    vec_stateDiff = []
    
    # Find the timestep size [s]
    dt = round((dat['time'][1] - dat['time'][0]).values/np.timedelta64(1, 's'))
    print('    timestep size = ' + str(dt) + ' s.')

    # Intrinsic densities (needed for snow)
    iden_liq = 1000 # [kg m-3]
    iden_ice = 917
    
    # --- Water balance components soil ---
    # Start of time loop
    for t in range(0,len(dat['time'])-1):
       
        # Specify time as indices
        S_time = t 
        E_time = t+1
    
        # Get the layer variables at t=t
        S_nSoil   = dat['nSoil'].isel(time=S_time).values.astype('int')
        S_nSnow   = dat['nSnow'].isel(time=S_time).values.astype('int')
        S_nLayers = dat['nLayers'].isel(time=S_time).values.astype('int')
        
        # Do computations based on the specified domain
        if domain == 'snow':
            
            # Snow layer depths
            S_mLayerDepth = dat['mLayerDepth'].isel(time=S_time,midToto=slice(0,S_nSnow)).values # needs to start at index 0, not 1 in Python
            
            # Snow water at the start of t=t
            S_mLayerVolFraqLiq = dat['mLayerVolFracLiq'].isel(time=S_time,midToto=slice(0,S_nSnow)).values
            S_mLayerVolFraqIce = dat['mLayerVolFracIce'].isel(time=S_time,midToto=slice(0,S_nSnow)).values
            mass0 = sum( (S_mLayerVolFraqLiq * iden_liq + S_mLayerVolFraqIce * iden_ice) * S_mLayerDepth )
            snowBalance0 = mass0 / iden_liq

            # Layer variables at t=t+1
            E_nSoil   = dat['nSoil'].isel(time=E_time).values.astype('int')
            E_nSnow   = dat['nSnow'].isel(time=E_time).values.astype('int')
            E_nLayers = dat['nLayers'].isel(time=E_time).values.astype('int')
            E_mLayerDepth = dat['mLayerDepth'].isel(time=E_time,midToto=slice(0,E_nSnow)).values # needs to start at index 0, not 1 in Python

            # Rainfall flux between t=t and t=t+1
            scalarRainfall = (dat['scalarRainfall'].isel(time=E_time).values / iden_liq) * dt # [kg m-2 s-1] / [kg m-3] * [s] = [m]
            
            # Snowfall flux between t=t and t=t+1
            scalarSnowfall = (dat['scalarSnowfall'].isel(time=E_time).values / iden_ice) * dt # [kg m-2 s-1] / [kg m-3] * [s] = [m]
            
            # Melt flux between t=t and t=t+1
            scalarRainPlusMelt = dat['scalarRainPlusMelt'].isel(time=E_time).values * dt # [m s-1] * [s] = [m]
            
            # Snow water at the end of t=t; i.e. at the start of t=t+1
            E_mLayerVolFraqLiq = dat['mLayerVolFracLiq'].isel(time=E_time,midToto=slice(0,S_nSnow)).values
            E_mLayerVolFraqIce = dat['mLayerVolFracIce'].isel(time=E_time,midToto=slice(0,S_nSnow)).values
            mass1 = sum( (E_mLayerVolFraqLiq * iden_liq + E_mLayerVolFraqIce * iden_ice) * E_mLayerDepth )
            snowBalance1 = mass1 / iden_liq
            
            # Water balance error
            liqError = snowBalance1 - (snowBalance0 + scalarRainfall + scalarSnowfall - scalarRainPlusMelt)
            
            # Append
            vec_liqError.append(liqError)
            vec_state.append(snowBalance1)
            vec_stateDiff.append(snowBalance1 - snowBalance0)
        
        elif domain == 'soil':
        
            # Soil layer depths
            S_mLayerDepth = dat['mLayerDepth'].isel(time=S_time,midToto=slice(S_nSnow,S_nLayers)).values # needs to start at index 0, not 1 in Python
        
            # Soil water at the start of t=t
            S_mLayerVolFraqLiq = dat['mLayerVolFracLiq'].isel(time=S_time,midToto=slice(S_nSnow,S_nLayers)).values
            S_mLayerVolFraqIce = dat['mLayerVolFracIce'].isel(time=S_time,midToto=slice(S_nSnow,S_nLayers)).values
            soilBalance0 = sum( (S_mLayerVolFraqLiq + S_mLayerVolFraqIce) * S_mLayerDepth )   
    
            # Layer variables at t=t+1
            E_nSoil   = dat['nSoil'].isel(time=E_time).values.astype('int')
            E_nSnow   = dat['nSnow'].isel(time=E_time).values.astype('int')
            E_nLayers = dat['nLayers'].isel(time=E_time).values.astype('int')
            E_mLayerDepth = dat['mLayerDepth'].isel(time=E_time,midToto=slice(E_nSnow,E_nLayers)).values # needs to start at index 0, not 1 in Python
        
            # Vertical downward flux between t=t and t=t+1 (needs t=t+1)
            iLayerLiqFluxSoil_top = dat['iLayerLiqFluxSoil'].isel(time=E_time,ifcSoil=E_nSoil).values
            iLayerLiqFluxSoil_bot = dat['iLayerLiqFluxSoil'].isel(time=E_time,ifcSoil=0).values
            vertFlux = -1* (iLayerLiqFluxSoil_top - iLayerLiqFluxSoil_bot) * dt
    
            # Transpiration between t=t and t=t+1 (needs t=t+1)
            tranSink = dat['mLayerTranspire'].isel(time=E_time).sum().values*dt 
    
            # Baseflow between t=t and t=t+1 (needs t=t+1)
            baseSink = dat['mLayerBaseflow'].isel(time=E_time).sum().values*dt
    
            # Compression between t=t and t=t+1 (needs t=t+1)
            mLayerCompress = dat['mLayerCompress'].isel(time=E_time,midSoil=slice(0,E_nSoil)).values
            compSink = sum(mLayerCompress * E_mLayerDepth)
    
            # Soil water at the end of t=t; i.e. at the start of t=t+1
            E_mLayerVolFraqLiq = dat['mLayerVolFracLiq'].isel(time=E_time,midToto=slice(E_nSnow,E_nLayers)).values
            E_mLayerVolFraqIce = dat['mLayerVolFracIce'].isel(time=E_time,midToto=slice(E_nSnow,E_nLayers)).values
            soilBalance1 = sum( (E_mLayerVolFraqLiq + E_mLayerVolFraqIce) * E_mLayerDepth )
    
            # Water balance error
            liqError = soilBalance1 - (soilBalance0 + vertFlux + tranSink - baseSink - compSink)
        
            # Append
            vec_liqError.append(liqError)
            vec_state.append(soilBalance1)
            vec_stateDiff.append(soilBalance1 - soilBalance0)
            
    return vec_liqError, vec_state, vec_stateDiff

In [334]:
def prep_vanderborght(file,file_ic,HRU,GRU):
    
    # load the simulations
    dat = xr.open_dataset( file ).isel(hru=HRU, gru=GRU).load()
    
    # load the initial conditions
    ICs = xr.open_dataset( file_ic ).isel(hru=HRU) # Has no GRU dimension
    
    # Find the datetime at the start of the simulation timestep
    timeD = int( ICs['dt_init'].values[0] ) #np.int32 to int, timestep size in minutes
    timeE = dat['time'].values #timestamp at the end of the simulations
    timeS = timeE - np.timedelta64(timeD,'m') #timestamp of the start of the timestep
    
    # Assign a time dimension to the ICs for merging
    ICsn = ICs.expand_dims(time=timeS)
    
    # --- Merge initial conditions and simulations ---
    # Variables with a 'midToto' dimension
    var = ['mLayerDepth','mLayerVolFracLiq','mLayerVolFracIce']
    merged = xr.merge([dat[var].where(dat[var] != -9999, drop=True), \
                        ICsn[var]])

    # Variables that are not part of the initial conditions and can be merged easily
    var = ['nSoil','nSnow','nLayers','iLayerLiqFluxSoil','mLayerTranspire','mLayerCompress','mLayerBaseflow']
    for v in var:
        merged[v] = dat[v]
    
    # Fill the "nX" variables on first time step
    var = ['nSoil','nSnow']
    for v in var:
        merged[v].loc[dict(time=timeS)] = ICsn[v].isel(scalarv=0).values
    merged['nLayers'].loc[dict(time=timeS)] = merged['nSoil'].loc[dict(time=timeS)].values + \
                                                merged['nSnow'].loc[dict(time=timeS)]
       
    return merged

Processing starts here

In [428]:
# initiate some lists
test_name   = []
maxAbsErr   = []
meanAbsErr  = []
cumAbsErr   = []
maxRelErr1  = []
meanRelErr1 = []
maxRelErr2  = []
meanRelErr2 = []

# loop over the files
for test,file in file_paths.items():
    
    # progress
    print('Working on ' + test)
    
    # load the data
    if 'Colbeck' in test:
        dat = xr.open_dataset( file ).isel(hru=HRU).load() # Colbeck output has no GRU dimensions
        domain = 'snow'
    elif 'Vanderborght' in test:
        file_IC = '../lt3_vanderborght2005/initialConditions/summa_zInitialCond_vanderborght2005.nc' # initial conditions
        dat = prep_vanderborght( file,file_IC,HRU,GRU ) # Vanderborght needs special data prep
        domain = 'soil'
    else:
        dat = xr.open_dataset( file ).isel(hru=HRU, gru=GRU).load()
        domain = 'soil'
    
    # Get water balance values
    wbe,states,dStates = calc_wb(dat,domain)

    # Calculate the relative error series
    rn = list(map(truediv, wbe, dStates))
    rrn = list(map(truediv, wbe, states)) # divide two lists element-wise
              
    # Remove NaNs if present
    rn  = [val for val in rn if str(val) != 'nan']
    rrn = [val for val in rrn if str(val) != 'nan']
        
    # Store the remaining metrics
    test_name.append(test)
    maxAbsErr.append(np.max(np.abs(wbe)))
    meanAbsErr.append(np.mean(np.abs(wbe)))
    cumAbsErr.append(np.sum(np.abs(wbe)))
    maxRelErr1.append(np.max(rn))
    meanRelErr1.append(np.mean(rn))      
    maxRelErr2.append(np.max(rrn))
    meanRelErr2.append(np.mean(rrn))

Working on Celia
    timestep size = 1800.0 s.
Working on Miller clay
    timestep size = 900.0 s.




Working on Miller loam
    timestep size = 900.0 s.
Working on Miller sand
    timestep size = 900.0 s.




Working on Vanderborght exp = 1
    timestep size = 3600.0 s.
Working on Vanderborght exp = 2
    timestep size = 3600.0 s.
Working on Vanderborght exp = 3
    timestep size = 3600.0 s.
Working on Wigmosta exp = 1
    timestep size = 3600.0 s.
Working on Wigmosta exp = 2
    timestep size = 3600.0 s.
Working on Colbeck exp = 1
    timestep size = 60.0 s.
Working on Colbeck exp = 2
    timestep size = 60.0 s.
Working on Colbeck exp = 3
    timestep size = 60.0 s.




Working on Mizoguchi
    timestep size = 60.0 s.


In [429]:
# Make a dataframe
results = pd.DataFrame( {'Test'               : test_name,
                         'Max abs err    [m]' : maxAbsErr,
                         'Mean abs err   [m]' : meanAbsErr,
                         'Cumm abs err   [m]' : cumAbsErr,
                         'Max rel err 1  [-]' : maxRelErr1,
                         'Mean rel err 1 [-]' : meanRelErr1,
                         'Max rel err 2  [-]' : maxRelErr2,
                         'Mean rel err 2 [-]' : meanRelErr2},
                       columns = ['Test', 'Max abs err    [m]', 'Mean abs err   [m]', 'Cumm abs err   [m]', \
                                  'Max rel err 1  [-]', 'Mean rel err 1 [-]', 'Max rel err 2  [-]', \
                                  'Mean rel err 2 [-]'])
results

Unnamed: 0,Test,Max abs err [m],Mean abs err [m],Cumm abs err [m],Max rel err 1 [-],Mean rel err 1 [-],Max rel err 2 [-],Mean rel err 2 [-]
0,Celia,2.42936e-07,4.517692e-08,5.376053e-06,-3.789115e-05,-7.3e-05,-8.187273e-08,-4.579465e-07
1,Miller clay,1.18514e-06,1.008642e-07,2.400569e-05,0.0005687777,-0.000135,2.148724e-07,-1.251693e-07
2,Miller loam,7.371279e-06,2.179302e-06,0.0005186738,-0.0005308398,-0.000726,-8.455538e-07,-1.602775e-06
3,Miller sand,8.06606e-05,4.851944e-06,0.001154763,0.002525567,-0.000519,1.911298e-07,-2.309497e-06
4,Vanderborght exp = 1,0.2870912,0.2870912,0.2870912,1.0,1.0,-0.9174916,-0.9174916
5,Vanderborght exp = 2,0.1082889,0.1082889,0.1082889,1.0,1.0,-0.2202286,-0.2202286
6,Vanderborght exp = 3,0.2510531,0.2510531,0.2510531,1.0,1.0,-0.7194592,-0.7194592
7,Wigmosta exp = 1,2.042411e-07,1.156492e-09,1.163431e-06,0.009356937,7e-06,2.936944e-08,-2.709875e-09
8,Wigmosta exp = 2,1.420161e-07,5.79014e-09,5.82488e-06,0.0007874252,-1.5e-05,5.52723e-08,-2.541616e-08
9,Colbeck exp = 1,2.739149e-08,5.305462e-10,3.177972e-07,5.713418e-07,-8e-06,8.664178e-08,1.684898e-09


## Trial code

In [15]:
test = 'Vanderborght exp = 1'

In [294]:
file = '../lt3_vanderborght2005/output/vanderborght2005_exp1_output_timestep.nc'

In [17]:
file

'../lt3_vanderborght2005/output/vanderborght2005_exp1_output_timestep.nc'

In [295]:
dat = xr.open_dataset( file ).isel(hru=HRU, gru=GRU).load()

In [298]:
# Get ICs
if 'Vanderborght' in test:
    ICs = xr.open_dataset( '../lt3_vanderborght2005/initialConditions/summa_zInitialCond_vanderborght2005.nc' ).isel(hru=HRU)

In [296]:
# Create datetime to assign to initial conditions
timeD = int( ICs['dt_init'].values[0] ) #np.int32 to int
timeE = dat['time'].values
timeS = timeE - np.timedelta64(timeD,'m')
timeS

array(['1989-12-31T23:00:00.000000000'], dtype='datetime64[ns]')

In [299]:
# Add a time dimension to the IC file
ICsn = ICs.expand_dims(time=timeS)

In [300]:
# Merge data
    
# Variables with a 'midToto' dimension
var = ['mLayerDepth','mLayerVolFracLiq','mLayerVolFracIce']
vdb = xr.merge([dat[var].where(dat[var] != -9999, drop=True), \
                ICsn[var]])

# Variables that are not part of the initial conditions and can be merged easily
var = ['nSoil','nSnow','nLayers','iLayerLiqFluxSoil','mLayerTranspire','mLayerCompress','mLayerBaseflow']
for v in var:
    vdb[v] = dat[v]
    
# Fill the 'nLayers' variable on first time step
var = ['nSoil','nSnow']
for v in var:
    vdb[v].loc[dict(time=timeS)] = ICsn[v].isel(scalarv=0).values
vdb['nLayers'].loc[dict(time=timeS)] = vdb['nSoil'].loc[dict(time=timeS)].values + \
                                        vdb['nSnow'].loc[dict(time=timeS)]

In [301]:
vdb

In [302]:
wb = calc_wb(vdb,'soil')

    timestep size = 3600.0 s.


In [303]:
wb

[-0.2870911997891278]