In [1]:
%matplotlib inline
import xarray as xr
import pandas as pd
import numpy as np
import os
import glob
import netCDF4 
import matplotlib.pyplot as plt
import warnings

warnings.simplefilter('ignore')
plt.style.use('ggplot')

# Time series plots for GOTM output

This notebook compares results from GOTM using the ERA-Interim data as "pseudo-observations" against predictions made using S4 forecasts. See the `Readme` file here:

    shared/WATExR/SimplyQ_GOTM_run_results/Readme.txt
    
for further details.

## 1. User input

In [2]:
# Define seasons
seasons_dict = {'winter':[12, 1, 2],
                'spring':[3, 4, 5],
                'summer':[6, 7, 8],
                'fall':  [9, 10, 11],
               }

# Paths to datasets
store_obs_nc_path = r'/home/jovyan/shared/WATExR/SimplyQ_GOTM_run_results/store_full_eraInterim.nc'
vanem_obs_nc_path = r'/home/jovyan/shared/WATExR/SimplyQ_GOTM_run_results/vanem_full_eraInterim.nc'

store_mod_fold = r'/home/jovyan/shared/WATExR/SimplyQ_GOTM_run_results/Storefjorden'
vanem_mod_fold = r'/home/jovyan/shared/WATExR/SimplyQ_GOTM_run_results/Vanemfjorden'

# Pat dict with values as (obs, mod)
path_dict = {'store':(store_obs_nc_path, store_mod_fold),
             'vanem':(vanem_obs_nc_path, vanem_mod_fold),
            }

## 2. Utility functions

In [3]:
def gotm_vars_to_dataframe(ds):
    """ Processes/aggregates the different variables of interest and returns a dataframe
        with one column per variable.
        
    Args:
        ds: Xarray dataset of GOTM "pseudo-observations"
        
    Returns:
        Dataframe
    """
    # Get vars of interest
    # Hice is stored as a 4D variable (time, lat, lon, z), but it's actually
    # 3D (time, lat, lon), with all values in the surface 'row'. Calculating
    # the means/medians therefore gives misleading values (due to all the 
    # zeros). Instead, set 0 => NaN, aggregate, then fill NaNs again    
    hice = ds['Hice'].values
    hice[hice == 0] = np.nan
    ds['Hice'].values = hice

    # Use means (over all depths where relevant) for now
    ds = ds[['temp', 'Hice', 'mld_surf']].mean(dim=['lat', 'lon', 'z'])
    df = ds.to_dataframe()
    df.fillna(0, inplace=True)
    df.index = pd.to_datetime(df.index.date)
    
    return df    

## 3. Data processing

In [5]:
# Loop over basins
for basin in path_dict.keys():
    print(f'Processing: {basin}fjorden')
    obs_path = path_dict[basin][0]
    mod_fold = path_dict[basin][1]
    
    # Read obs
    obs_ds = xr.open_dataset(obs_path, drop_variables='z')
    obs_df = gotm_vars_to_dataframe(obs_ds)  
    
    # Loop over seasons
    mod_list = []
    for season in seasons_dict.keys():
        print(f'  {season}')
        print(f'    Getting S4 data')      
        
        mem_list = []
        
        # Loop over S4 members
        for mem in range(0, 15):
            # Get S4 data for member (for all years in this season)
            search_path = os.path.join(mod_fold, f'{basin}_*_{season}_{mem}.nc')
            flist = sorted(glob.glob(search_path))

            # Loop over files for each year
            year_list = []
            for fpath in flist:
                mem_yr_ds = xr.open_dataset(fpath, drop_variables='z')
                mem_yr_df = gotm_vars_to_dataframe(mem_yr_ds)
                
                # Get just the months of interest
                months = seasons_dict[season]
                mem_yr_df = mem_yr_df.iloc[-100:] # Ensures we don't gets months from the previous year "warm-up" too
                mem_yr_df['month'] = mem_yr_df.index.month
                mem_yr_df = mem_yr_df.query('month in @months')
                del mem_yr_df['month']
                
                year_list.append(mem_yr_df)
                
            # Build single df for this member for all years
            mem_df = pd.concat(year_list, axis='rows')
            
            # Create multi-index with member
            iterables = [mem_df.columns, [mem + 1,]]
            mem_df.columns = pd.MultiIndex.from_product(iterables, 
                                                        names=['variable', 'member'])            
            mem_list.append(mem_df)
            
        # Build a dataframe of all members
        mod_df = pd.concat(mem_list, axis='columns')  
        
        # Add to overall model output
        mod_list.append(mod_df)
        
    # Build full model df
    mod_df = pd.concat(mod_list, axis='rows')
    mod_df.sort_index(inplace=True)
    
    # Match dates
    obs_df = obs_df[obs_df.index.isin(mod_df.index)]
    
    # Plot full series
    fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(15,15))

    var_dict = {'temp':    ('Temperature', '°C'),
                'Hice':    ('Ice thickness', 'm'),
                'mld_surf':('Surface mixed layer depth', 'm'),
               }

    for idx, variable in enumerate(['temp', 'Hice', 'mld_surf']):
        mod_df_var = mod_df[variable]
        mod_df_var = mod_df_var.quantile([0.025, 0.5, 0.975], axis='columns').T
        mod_df_var.columns = ['low', 'med', 'hi']

        # Plot
        obs_df[variable].plot(c='k', ax=axes[idx], label='ERA-Interim') 

        mod_df_var['med'].plot(c='r', ls='--', ax=axes[idx], label='S4 median')    

        axes[idx].fill_between(mod_df_var.index, 
                               mod_df_var['low'], 
                               mod_df_var['hi'],
                               alpha=0.1,
                               color='r',
                               label='S4 95% CI',
                              )

        axes[idx].legend(loc='best')

        axes[idx].set_title(var_dict[variable][0])
        axes[idx].set_ylabel(f'{var_dict[variable][0]} ({var_dict[variable][1]})')

    plt.tight_layout()
    plt.savefig(f'./pngs/{basin}fjorden_1981_2010.png', dpi=200)
    plt.close()    
    
    # Just Dec 1999 to Dec 2001
    obs_df = obs_df['1999-12-01':'2001-11-30']
    mod_df = mod_df['1999-12-01':'2001-11-30']
    
    fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(15,15))

    var_dict = {'temp':    ('Temperature', '°C'),
                'Hice':    ('Ice thickness', 'm'),
                'mld_surf':('Surface mixed layer depth', 'm'),
               }

    for idx, variable in enumerate(['temp', 'Hice', 'mld_surf']):
        mod_df_var = mod_df[variable]
        mod_df_var = mod_df_var.quantile([0.025, 0.5, 0.975], axis='columns').T
        mod_df_var.columns = ['low', 'med', 'hi']

        # Plot
        obs_df[variable].plot(c='k', ax=axes[idx], label='ERA-Interim') 

        mod_df_var['med'].plot(c='r', ls='--', ax=axes[idx], label='S4 median')    

        axes[idx].fill_between(mod_df_var.index, 
                               mod_df_var['low'], 
                               mod_df_var['hi'],
                               alpha=0.1,
                               color='r',
                               label='S4 95% CI',
                              )

        axes[idx].legend(loc='best')

        axes[idx].set_title(var_dict[variable][0])
        axes[idx].set_ylabel(f'{var_dict[variable][0]} ({var_dict[variable][1]})')

    plt.tight_layout()    
    plt.savefig(f'./pngs/{basin}fjorden_1999_2001.png', dpi=200)
    plt.close()   

Processing: storefjorden
  winter
    Getting S4 data
  spring
    Getting S4 data
  summer
    Getting S4 data
  fall
    Getting S4 data
Processing: vanemfjorden
  winter
    Getting S4 data
  spring
    Getting S4 data
  summer
    Getting S4 data
  fall
    Getting S4 data
